1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* if range is true, it returns B s.t. span{B} = range(A) 10 if range is false, it returns B s.t. range(B) _|_ range(A) */ 11 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 12 { 13 #if !defined(PETSC_USE_COMPLEX) 14 PetscScalar *uwork,*data,*U, ds = 0.; 15 PetscReal *sing; 16 PetscBLASInt bM,bN,lwork,lierr,di = 1; 17 PetscInt ulw,i,nr,nc,n; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 #if defined(PETSC_MISSING_LAPACK_GESVD) 22 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 23 #else 24 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 25 if (!nr || !nc) PetscFunctionReturn(0); 26 27 /* workspace */ 28 if (!work) { 29 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 30 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 31 } else { 32 ulw = lw; 33 uwork = work; 34 } 35 n = PetscMin(nr,nc); 36 if (!rwork) { 37 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 38 } else { 39 sing = rwork; 40 } 41 42 /* SVD */ 43 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 47 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 48 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 49 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 50 ierr = PetscFPTrapPop();CHKERRQ(ierr); 51 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 52 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 53 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 54 if (!rwork) { 55 ierr = PetscFree(sing);CHKERRQ(ierr); 56 } 57 if (!work) { 58 ierr = PetscFree(uwork);CHKERRQ(ierr); 59 } 60 /* create B */ 61 if (!range) { 62 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 63 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 65 } else { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } 70 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 71 ierr = PetscFree(U);CHKERRQ(ierr); 72 #endif 73 #else /* PETSC_USE_COMPLEX */ 74 PetscFunctionBegin; 75 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 76 #endif 77 PetscFunctionReturn(0); 78 } 79 80 /* TODO REMOVE */ 81 #if defined(PRINT_GDET) 82 static int inc = 0; 83 static int lev = 0; 84 #endif 85 86 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 87 { 88 PetscErrorCode ierr; 89 Mat GE,GEd; 90 PetscInt rsize,csize,esize; 91 PetscScalar *ptr; 92 93 PetscFunctionBegin; 94 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 95 if (!esize) PetscFunctionReturn(0); 96 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 97 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 98 99 /* gradients */ 100 ptr = work + 5*esize; 101 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 103 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 104 ierr = MatDestroy(&GE);CHKERRQ(ierr); 105 106 /* constants */ 107 ptr += rsize*csize; 108 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 109 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 110 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 111 ierr = MatDestroy(&GE);CHKERRQ(ierr); 112 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 113 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 114 115 if (corners) { 116 Mat GEc; 117 PetscScalar *vals,v; 118 119 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 120 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 121 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 122 /* v = PetscAbsScalar(vals[0]) */; 123 v = 1.; 124 cvals[0] = vals[0]/v; 125 cvals[1] = vals[1]/v; 126 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 127 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 128 #if defined(PRINT_GDET) 129 { 130 PetscViewer viewer; 131 char filename[256]; 132 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 133 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 134 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 135 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 136 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 137 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 138 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 140 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 141 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 142 } 143 #endif 144 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 145 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 146 } 147 148 PetscFunctionReturn(0); 149 } 150 151 PetscErrorCode PCBDDCNedelecSupport(PC pc) 152 { 153 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 154 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 155 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 156 Vec tvec; 157 PetscSF sfv; 158 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 159 MPI_Comm comm; 160 IS lned,primals,allprimals,nedfieldlocal; 161 IS *eedges,*extrows,*extcols,*alleedges; 162 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 163 PetscScalar *vals,*work; 164 PetscReal *rwork; 165 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 166 PetscInt ne,nv,Lv,order,n,field; 167 PetscInt n_neigh,*neigh,*n_shared,**shared; 168 PetscInt i,j,extmem,cum,maxsize,nee; 169 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 170 PetscInt *sfvleaves,*sfvroots; 171 PetscInt *corners,*cedges; 172 PetscInt *ecount,**eneighs,*vcount,**vneighs; 173 #if defined(PETSC_USE_DEBUG) 174 PetscInt *emarks; 175 #endif 176 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 /* If the discrete gradient is defined for a subset of dofs and global is true, 181 it assumes G is given in global ordering for all the dofs. 182 Otherwise, the ordering is global for the Nedelec field */ 183 order = pcbddc->nedorder; 184 conforming = pcbddc->conforming; 185 field = pcbddc->nedfield; 186 global = pcbddc->nedglobal; 187 setprimal = PETSC_FALSE; 188 print = PETSC_FALSE; 189 singular = PETSC_FALSE; 190 191 /* Command line customization */ 192 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 193 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 194 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 195 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 196 /* print debug info TODO: to be removed */ 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsEnd();CHKERRQ(ierr); 199 200 /* Return if there are no edges in the decomposition and the problem is not singular */ 201 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 202 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 203 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 204 if (!singular) { 205 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 206 lrc[0] = PETSC_FALSE; 207 for (i=0;i<n;i++) { 208 if (PetscRealPart(vals[i]) > 2.) { 209 lrc[0] = PETSC_TRUE; 210 break; 211 } 212 } 213 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 214 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 215 if (!lrc[1]) PetscFunctionReturn(0); 216 } 217 218 /* Get Nedelec field */ 219 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 220 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 221 if (pcbddc->n_ISForDofsLocal && field >= 0) { 222 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 223 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 224 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 225 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 226 ne = n; 227 nedfieldlocal = NULL; 228 global = PETSC_TRUE; 229 } else if (field == PETSC_DECIDE) { 230 PetscInt rst,ren,*idx; 231 232 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 233 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 234 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 235 for (i=rst;i<ren;i++) { 236 PetscInt nc; 237 238 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 239 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 240 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 } 242 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 243 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 244 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 245 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 246 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 247 } else { 248 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 249 } 250 251 /* Sanity checks */ 252 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 253 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 254 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 255 256 /* Just set primal dofs and return */ 257 if (setprimal) { 258 IS enedfieldlocal; 259 PetscInt *eidxs; 260 261 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 262 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 263 if (nedfieldlocal) { 264 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 265 for (i=0,cum=0;i<ne;i++) { 266 if (PetscRealPart(vals[idxs[i]]) > 2.) { 267 eidxs[cum++] = idxs[i]; 268 } 269 } 270 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 271 } else { 272 for (i=0,cum=0;i<ne;i++) { 273 if (PetscRealPart(vals[i]) > 2.) { 274 eidxs[cum++] = i; 275 } 276 } 277 } 278 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 279 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 280 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 281 ierr = PetscFree(eidxs);CHKERRQ(ierr); 282 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 283 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 /* Compute some l2g maps */ 288 if (nedfieldlocal) { 289 IS is; 290 291 /* need to map from the local Nedelec field to local numbering */ 292 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 293 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 294 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 295 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 297 if (global) { 298 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 299 el2g = al2g; 300 } else { 301 IS gis; 302 303 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 304 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 305 ierr = ISDestroy(&gis);CHKERRQ(ierr); 306 } 307 ierr = ISDestroy(&is);CHKERRQ(ierr); 308 } else { 309 /* restore default */ 310 pcbddc->nedfield = -1; 311 /* one ref for the destruction of al2g, one for el2g */ 312 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 313 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 314 el2g = al2g; 315 fl2g = NULL; 316 } 317 318 /* Start communication to drop connections for interior edges (for cc analysis only) */ 319 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 320 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 321 if (nedfieldlocal) { 322 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 323 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 324 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 } else { 326 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 327 } 328 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 329 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 330 331 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 332 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 333 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 334 if (global) { 335 PetscInt rst; 336 337 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 338 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 339 if (matis->sf_rootdata[i] < 2) { 340 matis->sf_rootdata[cum++] = i + rst; 341 } 342 } 343 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 344 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 345 } else { 346 PetscInt *tbz; 347 348 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 349 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 350 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 351 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 352 for (i=0,cum=0;i<ne;i++) 353 if (matis->sf_leafdata[idxs[i]] == 1) 354 tbz[cum++] = i; 355 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 357 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 358 ierr = PetscFree(tbz);CHKERRQ(ierr); 359 } 360 } else { /* we need the entire G to infer the nullspace */ 361 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 362 G = pcbddc->discretegradient; 363 } 364 365 /* Extract subdomain relevant rows of G */ 366 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 367 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 368 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 369 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISDestroy(&lned);CHKERRQ(ierr); 371 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 372 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 373 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 374 375 /* SF for nodal dofs communications */ 376 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 377 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 378 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 379 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 380 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 382 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 384 i = singular ? 2 : 1; 385 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 386 387 /* Destroy temporary G created in MATIS format and modified G */ 388 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 389 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 390 ierr = MatDestroy(&G);CHKERRQ(ierr); 391 392 if (print) { 393 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 394 ierr = MatView(lG,NULL);CHKERRQ(ierr); 395 } 396 397 /* Save lG for values insertion in change of basis */ 398 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 399 400 /* Analyze the edge-nodes connections (duplicate lG) */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 402 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 403 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 404 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 405 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 408 /* need to import the boundary specification to ensure the 409 proper detection of coarse edges' endpoints */ 410 if (pcbddc->DirichletBoundariesLocal) { 411 IS is; 412 413 if (fl2g) { 414 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 415 } else { 416 is = pcbddc->DirichletBoundariesLocal; 417 } 418 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 419 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 420 for (i=0;i<cum;i++) { 421 if (idxs[i] >= 0) { 422 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 423 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 424 } 425 } 426 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 427 if (fl2g) { 428 ierr = ISDestroy(&is);CHKERRQ(ierr); 429 } 430 } 431 if (pcbddc->NeumannBoundariesLocal) { 432 IS is; 433 434 if (fl2g) { 435 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 436 } else { 437 is = pcbddc->NeumannBoundariesLocal; 438 } 439 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 441 for (i=0;i<cum;i++) { 442 if (idxs[i] >= 0) { 443 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 444 } 445 } 446 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 447 if (fl2g) { 448 ierr = ISDestroy(&is);CHKERRQ(ierr); 449 } 450 } 451 452 /* Count neighs per dof */ 453 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 454 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 455 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 456 for (i=1,cum=0;i<n_neigh;i++) { 457 cum += n_shared[i]; 458 for (j=0;j<n_shared[i];j++) { 459 ecount[shared[i][j]]++; 460 } 461 } 462 if (ne) { 463 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 464 } 465 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 466 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 467 for (i=1;i<n_neigh;i++) { 468 for (j=0;j<n_shared[i];j++) { 469 PetscInt k = shared[i][j]; 470 eneighs[k][ecount[k]] = neigh[i]; 471 ecount[k]++; 472 } 473 } 474 for (i=0;i<ne;i++) { 475 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 476 } 477 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 478 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 479 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 480 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 481 for (i=1,cum=0;i<n_neigh;i++) { 482 cum += n_shared[i]; 483 for (j=0;j<n_shared[i];j++) { 484 vcount[shared[i][j]]++; 485 } 486 } 487 if (nv) { 488 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 489 } 490 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 491 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 492 for (i=1;i<n_neigh;i++) { 493 for (j=0;j<n_shared[i];j++) { 494 PetscInt k = shared[i][j]; 495 vneighs[k][vcount[k]] = neigh[i]; 496 vcount[k]++; 497 } 498 } 499 for (i=0;i<nv;i++) { 500 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 501 } 502 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 503 504 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 505 for proper detection of coarse edges' endpoints */ 506 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 507 for (i=0;i<ne;i++) { 508 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 509 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 510 } 511 } 512 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 513 if (!conforming) { 514 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 515 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 516 } 517 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 518 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 519 cum = 0; 520 for (i=0;i<ne;i++) { 521 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 522 if (!PetscBTLookup(btee,i)) { 523 marks[cum++] = i; 524 continue; 525 } 526 /* set badly connected edge dofs as primal */ 527 if (!conforming) { 528 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 529 marks[cum++] = i; 530 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 531 for (j=ii[i];j<ii[i+1];j++) { 532 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 533 } 534 } else { 535 /* every edge dofs should be connected trough a certain number of nodal dofs 536 to other edge dofs belonging to coarse edges 537 - at most 2 endpoints 538 - order-1 interior nodal dofs 539 - no undefined nodal dofs (nconn < order) 540 */ 541 PetscInt ends = 0,ints = 0, undef = 0; 542 for (j=ii[i];j<ii[i+1];j++) { 543 PetscInt v = jj[j],k; 544 PetscInt nconn = iit[v+1]-iit[v]; 545 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 546 if (nconn > order) ends++; 547 else if (nconn == order) ints++; 548 else undef++; 549 } 550 if (undef || ends > 2 || ints != order -1) { 551 marks[cum++] = i; 552 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 553 for (j=ii[i];j<ii[i+1];j++) { 554 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 555 } 556 } 557 } 558 } 559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 560 if (!order && ii[i+1] != ii[i]) { 561 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 562 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 563 } 564 } 565 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 566 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 567 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 568 if (!conforming) { 569 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 570 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 571 } 572 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 573 574 /* identify splitpoints and corner candidates */ 575 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 576 if (print) { 577 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 578 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 579 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 580 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 581 } 582 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 583 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 584 for (i=0;i<nv;i++) { 585 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 586 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 587 if (!order) { /* variable order */ 588 PetscReal vorder = 0.; 589 590 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 591 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 592 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 593 ord = 1; 594 } 595 #if defined(PETSC_USE_DEBUG) 596 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 597 #endif 598 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 599 if (PetscBTLookup(btbd,jj[j])) { 600 bdir = PETSC_TRUE; 601 break; 602 } 603 if (vc != ecount[jj[j]]) { 604 sneighs = PETSC_FALSE; 605 } else { 606 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 607 for (k=0;k<vc;k++) { 608 if (vn[k] != en[k]) { 609 sneighs = PETSC_FALSE; 610 break; 611 } 612 } 613 } 614 } 615 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 616 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 617 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 618 } else if (test == ord) { 619 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 620 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else { 623 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 624 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 625 } 626 } 627 } 628 ierr = PetscFree(ecount);CHKERRQ(ierr); 629 ierr = PetscFree(vcount);CHKERRQ(ierr); 630 if (ne) { 631 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 632 } 633 if (nv) { 634 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 635 } 636 ierr = PetscFree(eneighs);CHKERRQ(ierr); 637 ierr = PetscFree(vneighs);CHKERRQ(ierr); 638 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 639 640 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 641 if (order != 1) { 642 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 643 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 644 for (i=0;i<nv;i++) { 645 if (PetscBTLookup(btvcand,i)) { 646 PetscBool found = PETSC_FALSE; 647 for (j=ii[i];j<ii[i+1] && !found;j++) { 648 PetscInt k,e = jj[j]; 649 if (PetscBTLookup(bte,e)) continue; 650 for (k=iit[e];k<iit[e+1];k++) { 651 PetscInt v = jjt[k]; 652 if (v != i && PetscBTLookup(btvcand,v)) { 653 found = PETSC_TRUE; 654 break; 655 } 656 } 657 } 658 if (!found) { 659 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 660 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 661 } else { 662 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 663 } 664 } 665 } 666 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 667 } 668 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 669 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 670 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 671 672 /* Get the local G^T explicitly */ 673 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 674 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 675 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 676 677 /* Mark interior nodal dofs */ 678 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 679 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 680 for (i=1;i<n_neigh;i++) { 681 for (j=0;j<n_shared[i];j++) { 682 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 683 } 684 } 685 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 686 687 /* communicate corners and splitpoints */ 688 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 689 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 690 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 691 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 692 693 if (print) { 694 IS tbz; 695 696 cum = 0; 697 for (i=0;i<nv;i++) 698 if (sfvleaves[i]) 699 vmarks[cum++] = i; 700 701 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 702 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 703 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 704 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 705 } 706 707 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 708 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 709 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 710 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 711 712 /* Zero rows of lGt corresponding to identified corners 713 and interior nodal dofs */ 714 cum = 0; 715 for (i=0;i<nv;i++) { 716 if (sfvleaves[i]) { 717 vmarks[cum++] = i; 718 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 719 } 720 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 721 } 722 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 723 if (print) { 724 IS tbz; 725 726 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 727 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 728 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 729 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 730 } 731 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 732 ierr = PetscFree(vmarks);CHKERRQ(ierr); 733 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 734 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 735 736 /* Recompute G */ 737 ierr = MatDestroy(&lG);CHKERRQ(ierr); 738 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 739 if (print) { 740 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 741 ierr = MatView(lG,NULL);CHKERRQ(ierr); 742 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 743 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 744 } 745 746 /* Get primal dofs (if any) */ 747 cum = 0; 748 for (i=0;i<ne;i++) { 749 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 750 } 751 if (fl2g) { 752 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 753 } 754 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 755 if (print) { 756 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 757 ierr = ISView(primals,NULL);CHKERRQ(ierr); 758 } 759 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 760 /* TODO: what if the user passed in some of them ? */ 761 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 762 ierr = ISDestroy(&primals);CHKERRQ(ierr); 763 764 /* Compute edge connectivity */ 765 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 766 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 767 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 768 if (fl2g) { 769 PetscBT btf; 770 PetscInt *iia,*jja,*iiu,*jju; 771 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 772 773 /* create CSR for all local dofs */ 774 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 775 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 776 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 777 iiu = pcbddc->mat_graph->xadj; 778 jju = pcbddc->mat_graph->adjncy; 779 } else if (pcbddc->use_local_adj) { 780 rest = PETSC_TRUE; 781 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 782 } else { 783 free = PETSC_TRUE; 784 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 785 iiu[0] = 0; 786 for (i=0;i<n;i++) { 787 iiu[i+1] = i+1; 788 jju[i] = -1; 789 } 790 } 791 792 /* import sizes of CSR */ 793 iia[0] = 0; 794 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 795 796 /* overwrite entries corresponding to the Nedelec field */ 797 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 798 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 799 for (i=0;i<ne;i++) { 800 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 801 iia[idxs[i]+1] = ii[i+1]-ii[i]; 802 } 803 804 /* iia in CSR */ 805 for (i=0;i<n;i++) iia[i+1] += iia[i]; 806 807 /* jja in CSR */ 808 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 809 for (i=0;i<n;i++) 810 if (!PetscBTLookup(btf,i)) 811 for (j=0;j<iiu[i+1]-iiu[i];j++) 812 jja[iia[i]+j] = jju[iiu[i]+j]; 813 814 /* map edge dofs connectivity */ 815 if (jj) { 816 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 817 for (i=0;i<ne;i++) { 818 PetscInt e = idxs[i]; 819 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 820 } 821 } 822 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 823 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 824 if (rest) { 825 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 826 } 827 if (free) { 828 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 829 } 830 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 831 } else { 832 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 833 } 834 835 /* Analyze interface for edge dofs */ 836 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 837 pcbddc->mat_graph->twodim = PETSC_FALSE; 838 839 /* Get coarse edges in the edge space */ 840 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 841 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 842 843 if (fl2g) { 844 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 845 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 846 for (i=0;i<nee;i++) { 847 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 848 } 849 } else { 850 eedges = alleedges; 851 primals = allprimals; 852 } 853 854 /* Mark fine edge dofs with their coarse edge id */ 855 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 856 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 857 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 858 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 859 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 860 if (print) { 861 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 862 ierr = ISView(primals,NULL);CHKERRQ(ierr); 863 } 864 865 maxsize = 0; 866 for (i=0;i<nee;i++) { 867 PetscInt size,mark = i+1; 868 869 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 870 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 871 for (j=0;j<size;j++) marks[idxs[j]] = mark; 872 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 873 maxsize = PetscMax(maxsize,size); 874 } 875 876 /* Find coarse edge endpoints */ 877 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 878 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 879 for (i=0;i<nee;i++) { 880 PetscInt mark = i+1,size; 881 882 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 883 if (!size && nedfieldlocal) continue; 884 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 885 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 886 if (print) { 887 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 888 ISView(eedges[i],NULL); 889 } 890 for (j=0;j<size;j++) { 891 PetscInt k, ee = idxs[j]; 892 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 893 for (k=ii[ee];k<ii[ee+1];k++) { 894 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 895 if (PetscBTLookup(btv,jj[k])) { 896 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 897 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 898 PetscInt k2; 899 PetscBool corner = PETSC_FALSE; 900 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 901 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 902 /* it's a corner if either is connected with an edge dof belonging to a different cc or 903 if the edge dof lie on the natural part of the boundary */ 904 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 905 corner = PETSC_TRUE; 906 break; 907 } 908 } 909 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 910 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 911 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 912 } else { 913 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 914 } 915 } 916 } 917 } 918 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 919 } 920 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 921 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 922 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 923 924 /* Reset marked primal dofs */ 925 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 926 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 927 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 928 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 929 930 /* Now use the initial lG */ 931 ierr = MatDestroy(&lG);CHKERRQ(ierr); 932 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 933 lG = lGinit; 934 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 935 936 /* Compute extended cols indices */ 937 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 938 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 939 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 940 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 941 i *= maxsize; 942 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 943 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 944 eerr = PETSC_FALSE; 945 for (i=0;i<nee;i++) { 946 PetscInt size,found = 0; 947 948 cum = 0; 949 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 950 if (!size && nedfieldlocal) continue; 951 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 952 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 953 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 954 for (j=0;j<size;j++) { 955 PetscInt k,ee = idxs[j]; 956 for (k=ii[ee];k<ii[ee+1];k++) { 957 PetscInt vv = jj[k]; 958 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 959 else if (!PetscBTLookupSet(btvc,vv)) found++; 960 } 961 } 962 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 963 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 964 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 965 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 966 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 967 /* it may happen that endpoints are not defined at this point 968 if it is the case, mark this edge for a second pass */ 969 if (cum != size -1 || found != 2) { 970 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 971 if (print) { 972 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 973 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 974 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 975 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 976 } 977 eerr = PETSC_TRUE; 978 } 979 } 980 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 981 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 982 if (done) { 983 PetscInt *newprimals; 984 985 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 986 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 987 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 988 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 989 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 990 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 991 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 992 for (i=0;i<nee;i++) { 993 PetscBool has_candidates = PETSC_FALSE; 994 if (PetscBTLookup(bter,i)) { 995 PetscInt size,mark = i+1; 996 997 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 998 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 999 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1000 for (j=0;j<size;j++) { 1001 PetscInt k,ee = idxs[j]; 1002 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1003 for (k=ii[ee];k<ii[ee+1];k++) { 1004 /* set all candidates located on the edge as corners */ 1005 if (PetscBTLookup(btvcand,jj[k])) { 1006 PetscInt k2,vv = jj[k]; 1007 has_candidates = PETSC_TRUE; 1008 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1009 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1010 /* set all edge dofs connected to candidate as primals */ 1011 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1012 if (marks[jjt[k2]] == mark) { 1013 PetscInt k3,ee2 = jjt[k2]; 1014 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1015 newprimals[cum++] = ee2; 1016 /* finally set the new corners */ 1017 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1019 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1020 } 1021 } 1022 } 1023 } else { 1024 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1025 } 1026 } 1027 } 1028 if (!has_candidates) { /* circular edge */ 1029 PetscInt k, ee = idxs[0],*tmarks; 1030 1031 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1032 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1033 for (k=ii[ee];k<ii[ee+1];k++) { 1034 PetscInt k2; 1035 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1036 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1037 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1038 } 1039 for (j=0;j<size;j++) { 1040 if (tmarks[idxs[j]] > 1) { 1041 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1042 newprimals[cum++] = idxs[j]; 1043 } 1044 } 1045 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1046 } 1047 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1048 } 1049 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1050 } 1051 ierr = PetscFree(extcols);CHKERRQ(ierr); 1052 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1053 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1054 if (fl2g) { 1055 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1056 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1057 for (i=0;i<nee;i++) { 1058 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1059 } 1060 ierr = PetscFree(eedges);CHKERRQ(ierr); 1061 } 1062 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1063 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1064 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1065 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1066 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1067 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1068 pcbddc->mat_graph->twodim = PETSC_FALSE; 1069 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1070 if (fl2g) { 1071 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1072 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1073 for (i=0;i<nee;i++) { 1074 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1075 } 1076 } else { 1077 eedges = alleedges; 1078 primals = allprimals; 1079 } 1080 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1081 1082 /* Mark again */ 1083 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1084 for (i=0;i<nee;i++) { 1085 PetscInt size,mark = i+1; 1086 1087 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1088 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1089 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1090 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1091 } 1092 if (print) { 1093 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1094 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1095 } 1096 1097 /* Recompute extended cols */ 1098 eerr = PETSC_FALSE; 1099 for (i=0;i<nee;i++) { 1100 PetscInt size; 1101 1102 cum = 0; 1103 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1104 if (!size && nedfieldlocal) continue; 1105 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1106 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1107 for (j=0;j<size;j++) { 1108 PetscInt k,ee = idxs[j]; 1109 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1110 } 1111 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1112 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1113 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1114 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1115 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1116 if (cum != size -1) { 1117 if (print) { 1118 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1119 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1120 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1121 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1122 } 1123 eerr = PETSC_TRUE; 1124 } 1125 } 1126 } 1127 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1128 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1129 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1130 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1131 /* an error should not occur at this point */ 1132 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1133 1134 /* Check the number of endpoints */ 1135 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1136 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1137 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1138 for (i=0;i<nee;i++) { 1139 PetscInt size, found = 0, gc[2]; 1140 1141 /* init with defaults */ 1142 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1143 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1144 if (!size && nedfieldlocal) continue; 1145 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1146 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1147 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1148 for (j=0;j<size;j++) { 1149 PetscInt k,ee = idxs[j]; 1150 for (k=ii[ee];k<ii[ee+1];k++) { 1151 PetscInt vv = jj[k]; 1152 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1153 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1154 corners[i*2+found++] = vv; 1155 } 1156 } 1157 } 1158 if (found != 2) { 1159 PetscInt e; 1160 if (fl2g) { 1161 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1162 } else { 1163 e = idxs[0]; 1164 } 1165 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1166 } 1167 1168 /* get primal dof index on this coarse edge */ 1169 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1170 if (gc[0] > gc[1]) { 1171 PetscInt swap = corners[2*i]; 1172 corners[2*i] = corners[2*i+1]; 1173 corners[2*i+1] = swap; 1174 } 1175 cedges[i] = idxs[size-1]; 1176 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1177 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1178 } 1179 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1180 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1181 1182 #if defined(PETSC_USE_DEBUG) 1183 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1184 not interfere with neighbouring coarse edges */ 1185 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1186 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1187 for (i=0;i<nv;i++) { 1188 PetscInt emax = 0,eemax = 0; 1189 1190 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1191 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1192 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1193 for (j=1;j<nee+1;j++) { 1194 if (emax < emarks[j]) { 1195 emax = emarks[j]; 1196 eemax = j; 1197 } 1198 } 1199 /* not relevant for edges */ 1200 if (!eemax) continue; 1201 1202 for (j=ii[i];j<ii[i+1];j++) { 1203 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1204 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1205 } 1206 } 1207 } 1208 ierr = PetscFree(emarks);CHKERRQ(ierr); 1209 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1210 #endif 1211 1212 /* Compute extended rows indices for edge blocks of the change of basis */ 1213 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1215 extmem *= maxsize; 1216 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1217 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1218 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1219 for (i=0;i<nv;i++) { 1220 PetscInt mark = 0,size,start; 1221 1222 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1223 for (j=ii[i];j<ii[i+1];j++) 1224 if (marks[jj[j]] && !mark) 1225 mark = marks[jj[j]]; 1226 1227 /* not relevant */ 1228 if (!mark) continue; 1229 1230 /* import extended row */ 1231 mark--; 1232 start = mark*extmem+extrowcum[mark]; 1233 size = ii[i+1]-ii[i]; 1234 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1235 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1236 extrowcum[mark] += size; 1237 } 1238 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1239 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1240 ierr = PetscFree(marks);CHKERRQ(ierr); 1241 1242 /* Compress extrows */ 1243 cum = 0; 1244 for (i=0;i<nee;i++) { 1245 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1246 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1247 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1248 cum = PetscMax(cum,size); 1249 } 1250 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1251 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1252 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1253 1254 /* Workspace for lapack inner calls and VecSetValues */ 1255 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1256 1257 /* Create change of basis matrix (preallocation can be improved) */ 1258 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1259 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1260 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1261 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1262 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1263 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1264 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1265 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1266 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1267 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1268 1269 /* Defaults to identity */ 1270 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1271 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1272 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1273 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1274 1275 /* Create discrete gradient for the coarser level if needed */ 1276 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1277 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1278 if (pcbddc->current_level < pcbddc->max_levels) { 1279 ISLocalToGlobalMapping cel2g,cvl2g; 1280 IS wis,gwis; 1281 PetscInt cnv,cne; 1282 1283 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1284 if (fl2g) { 1285 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1286 } else { 1287 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1288 pcbddc->nedclocal = wis; 1289 } 1290 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1291 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1292 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1293 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1294 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1296 1297 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1298 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1300 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1301 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1302 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1304 1305 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1306 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1307 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1308 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1309 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1310 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1311 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1312 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1313 } 1314 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1315 1316 #if defined(PRINT_GDET) 1317 inc = 0; 1318 lev = pcbddc->current_level; 1319 #endif 1320 1321 /* Insert values in the change of basis matrix */ 1322 for (i=0;i<nee;i++) { 1323 Mat Gins = NULL, GKins = NULL; 1324 IS cornersis = NULL; 1325 PetscScalar cvals[2]; 1326 1327 if (pcbddc->nedcG) { 1328 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1329 } 1330 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1331 if (Gins && GKins) { 1332 PetscScalar *data; 1333 const PetscInt *rows,*cols; 1334 PetscInt nrh,nch,nrc,ncc; 1335 1336 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1337 /* H1 */ 1338 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1339 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1340 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1341 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1342 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1343 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1344 /* complement */ 1345 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1346 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1347 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1348 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1349 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1350 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1351 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1352 1353 /* coarse discrete gradient */ 1354 if (pcbddc->nedcG) { 1355 PetscInt cols[2]; 1356 1357 cols[0] = 2*i; 1358 cols[1] = 2*i+1; 1359 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1360 } 1361 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1362 } 1363 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1364 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1365 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1366 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1367 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1368 } 1369 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1370 1371 /* Start assembling */ 1372 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1373 if (pcbddc->nedcG) { 1374 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 } 1376 1377 /* Free */ 1378 if (fl2g) { 1379 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1380 for (i=0;i<nee;i++) { 1381 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1382 } 1383 ierr = PetscFree(eedges);CHKERRQ(ierr); 1384 } 1385 1386 /* hack mat_graph with primal dofs on the coarse edges */ 1387 { 1388 PCBDDCGraph graph = pcbddc->mat_graph; 1389 PetscInt *oqueue = graph->queue; 1390 PetscInt *ocptr = graph->cptr; 1391 PetscInt ncc,*idxs; 1392 1393 /* find first primal edge */ 1394 if (pcbddc->nedclocal) { 1395 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1396 } else { 1397 if (fl2g) { 1398 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1399 } 1400 idxs = cedges; 1401 } 1402 cum = 0; 1403 while (cum < nee && cedges[cum] < 0) cum++; 1404 1405 /* adapt connected components */ 1406 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1407 graph->cptr[0] = 0; 1408 for (i=0,ncc=0;i<graph->ncc;i++) { 1409 PetscInt lc = ocptr[i+1]-ocptr[i]; 1410 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1411 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1412 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1413 ncc++; 1414 lc--; 1415 cum++; 1416 while (cum < nee && cedges[cum] < 0) cum++; 1417 } 1418 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1419 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1420 ncc++; 1421 } 1422 graph->ncc = ncc; 1423 if (pcbddc->nedclocal) { 1424 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1427 } 1428 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1429 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1430 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1431 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1432 1433 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1434 ierr = PetscFree(extrow);CHKERRQ(ierr); 1435 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1436 ierr = PetscFree(corners);CHKERRQ(ierr); 1437 ierr = PetscFree(cedges);CHKERRQ(ierr); 1438 ierr = PetscFree(extrows);CHKERRQ(ierr); 1439 ierr = PetscFree(extcols);CHKERRQ(ierr); 1440 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1441 1442 /* Complete assembling */ 1443 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1444 if (pcbddc->nedcG) { 1445 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 #if 0 1447 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1448 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1449 #endif 1450 } 1451 1452 /* set change of basis */ 1453 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1454 ierr = MatDestroy(&T);CHKERRQ(ierr); 1455 1456 PetscFunctionReturn(0); 1457 } 1458 1459 /* the near-null space of BDDC carries information on quadrature weights, 1460 and these can be collinear -> so cheat with MatNullSpaceCreate 1461 and create a suitable set of basis vectors first */ 1462 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1463 { 1464 PetscErrorCode ierr; 1465 PetscInt i; 1466 1467 PetscFunctionBegin; 1468 for (i=0;i<nvecs;i++) { 1469 PetscInt first,last; 1470 1471 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1472 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1473 if (i>=first && i < last) { 1474 PetscScalar *data; 1475 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1476 if (!has_const) { 1477 data[i-first] = 1.; 1478 } else { 1479 data[2*i-first] = 1./PetscSqrtReal(2.); 1480 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1481 } 1482 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1483 } 1484 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1485 } 1486 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1487 for (i=0;i<nvecs;i++) { /* reset vectors */ 1488 PetscInt first,last; 1489 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1490 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1491 if (i>=first && i < last) { 1492 PetscScalar *data; 1493 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1494 if (!has_const) { 1495 data[i-first] = 0.; 1496 } else { 1497 data[2*i-first] = 0.; 1498 data[2*i-first+1] = 0.; 1499 } 1500 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1501 } 1502 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1503 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1504 } 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1509 { 1510 Mat loc_divudotp; 1511 Vec p,v,vins,quad_vec,*quad_vecs; 1512 ISLocalToGlobalMapping map; 1513 IS *faces,*edges; 1514 PetscScalar *vals; 1515 const PetscScalar *array; 1516 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1517 PetscMPIInt rank; 1518 PetscErrorCode ierr; 1519 1520 PetscFunctionBegin; 1521 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1522 if (graph->twodim) { 1523 lmaxneighs = 2; 1524 } else { 1525 lmaxneighs = 1; 1526 for (i=0;i<ne;i++) { 1527 const PetscInt *idxs; 1528 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1529 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1530 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1531 } 1532 lmaxneighs++; /* graph count does not include self */ 1533 } 1534 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1535 maxsize = 0; 1536 for (i=0;i<ne;i++) { 1537 PetscInt nn; 1538 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1539 maxsize = PetscMax(maxsize,nn); 1540 } 1541 for (i=0;i<nf;i++) { 1542 PetscInt nn; 1543 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1544 maxsize = PetscMax(maxsize,nn); 1545 } 1546 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1547 /* create vectors to hold quadrature weights */ 1548 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1549 if (!transpose) { 1550 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1551 } else { 1552 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1553 } 1554 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1555 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1556 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1557 for (i=0;i<maxneighs;i++) { 1558 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1559 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1560 } 1561 1562 /* compute local quad vec */ 1563 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1564 if (!transpose) { 1565 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1566 } else { 1567 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1568 } 1569 ierr = VecSet(p,1.);CHKERRQ(ierr); 1570 if (!transpose) { 1571 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1572 } else { 1573 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1574 } 1575 if (vl2l) { 1576 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1577 } else { 1578 vins = v; 1579 } 1580 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1581 ierr = VecDestroy(&p);CHKERRQ(ierr); 1582 1583 /* insert in global quadrature vecs */ 1584 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1585 for (i=0;i<nf;i++) { 1586 const PetscInt *idxs; 1587 PetscInt idx,nn,j; 1588 1589 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1590 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1591 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1592 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1593 idx = -(idx+1); 1594 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1595 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1596 } 1597 for (i=0;i<ne;i++) { 1598 const PetscInt *idxs; 1599 PetscInt idx,nn,j; 1600 1601 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1602 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1603 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1604 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1605 idx = -(idx+1); 1606 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1608 } 1609 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1610 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1611 if (vl2l) { 1612 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1613 } 1614 ierr = VecDestroy(&v);CHKERRQ(ierr); 1615 ierr = PetscFree(vals);CHKERRQ(ierr); 1616 1617 /* assemble near null space */ 1618 for (i=0;i<maxneighs;i++) { 1619 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1620 } 1621 for (i=0;i<maxneighs;i++) { 1622 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1623 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1624 } 1625 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 1630 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1631 { 1632 PetscErrorCode ierr; 1633 Vec local,global; 1634 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1635 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1636 PetscBool monolithic = PETSC_FALSE; 1637 1638 PetscFunctionBegin; 1639 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1640 ierr = PetscOptionsBool("-pc_bddc_monolithic","Don't split dofs by block size",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1641 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1642 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1643 /* need to convert from global to local topology information and remove references to information in global ordering */ 1644 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1645 if (pcbddc->user_provided_isfordofs) { 1646 if (pcbddc->n_ISForDofs) { 1647 PetscInt i; 1648 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1649 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1650 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1651 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1652 } 1653 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1654 pcbddc->n_ISForDofs = 0; 1655 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1656 } 1657 } else { 1658 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1659 PetscInt i, n = matis->A->rmap->n; 1660 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1661 if (i > 1 && !monolithic) { 1662 pcbddc->n_ISForDofsLocal = i; 1663 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1664 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1665 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1666 } 1667 } 1668 } else { 1669 PetscInt i; 1670 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1671 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1672 } 1673 } 1674 } 1675 1676 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1677 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1678 } else if (pcbddc->DirichletBoundariesLocal) { 1679 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1680 } 1681 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1682 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1683 } else if (pcbddc->NeumannBoundariesLocal) { 1684 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1685 } 1686 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1687 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1688 } 1689 ierr = VecDestroy(&global);CHKERRQ(ierr); 1690 ierr = VecDestroy(&local);CHKERRQ(ierr); 1691 1692 PetscFunctionReturn(0); 1693 } 1694 1695 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1696 { 1697 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1698 PetscErrorCode ierr; 1699 IS nis; 1700 const PetscInt *idxs; 1701 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1702 PetscBool *ld; 1703 1704 PetscFunctionBegin; 1705 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1706 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1707 if (mop == MPI_LAND) { 1708 /* init rootdata with true */ 1709 ld = (PetscBool*) matis->sf_rootdata; 1710 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1711 } else { 1712 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1713 } 1714 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1715 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1716 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1717 ld = (PetscBool*) matis->sf_leafdata; 1718 for (i=0;i<nd;i++) 1719 if (-1 < idxs[i] && idxs[i] < n) 1720 ld[idxs[i]] = PETSC_TRUE; 1721 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1722 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1723 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1724 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1725 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1726 if (mop == MPI_LAND) { 1727 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1728 } else { 1729 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1730 } 1731 for (i=0,nnd=0;i<n;i++) 1732 if (ld[i]) 1733 nidxs[nnd++] = i; 1734 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1735 ierr = ISDestroy(is);CHKERRQ(ierr); 1736 *is = nis; 1737 PetscFunctionReturn(0); 1738 } 1739 1740 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1741 { 1742 PC_IS *pcis = (PC_IS*)(pc->data); 1743 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1744 PetscErrorCode ierr; 1745 1746 PetscFunctionBegin; 1747 if (!pcbddc->benign_have_null) { 1748 PetscFunctionReturn(0); 1749 } 1750 if (pcbddc->ChangeOfBasisMatrix) { 1751 Vec swap; 1752 1753 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1754 swap = pcbddc->work_change; 1755 pcbddc->work_change = r; 1756 r = swap; 1757 } 1758 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1759 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1760 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1761 ierr = VecSet(z,0.);CHKERRQ(ierr); 1762 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1763 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1764 if (pcbddc->ChangeOfBasisMatrix) { 1765 pcbddc->work_change = r; 1766 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1767 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1768 } 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1773 { 1774 PCBDDCBenignMatMult_ctx ctx; 1775 PetscErrorCode ierr; 1776 PetscBool apply_right,apply_left,reset_x; 1777 1778 PetscFunctionBegin; 1779 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1780 if (transpose) { 1781 apply_right = ctx->apply_left; 1782 apply_left = ctx->apply_right; 1783 } else { 1784 apply_right = ctx->apply_right; 1785 apply_left = ctx->apply_left; 1786 } 1787 reset_x = PETSC_FALSE; 1788 if (apply_right) { 1789 const PetscScalar *ax; 1790 PetscInt nl,i; 1791 1792 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1793 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1794 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1795 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1796 for (i=0;i<ctx->benign_n;i++) { 1797 PetscScalar sum,val; 1798 const PetscInt *idxs; 1799 PetscInt nz,j; 1800 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1801 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1802 sum = 0.; 1803 if (ctx->apply_p0) { 1804 val = ctx->work[idxs[nz-1]]; 1805 for (j=0;j<nz-1;j++) { 1806 sum += ctx->work[idxs[j]]; 1807 ctx->work[idxs[j]] += val; 1808 } 1809 } else { 1810 for (j=0;j<nz-1;j++) { 1811 sum += ctx->work[idxs[j]]; 1812 } 1813 } 1814 ctx->work[idxs[nz-1]] -= sum; 1815 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1816 } 1817 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1818 reset_x = PETSC_TRUE; 1819 } 1820 if (transpose) { 1821 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1822 } else { 1823 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1824 } 1825 if (reset_x) { 1826 ierr = VecResetArray(x);CHKERRQ(ierr); 1827 } 1828 if (apply_left) { 1829 PetscScalar *ay; 1830 PetscInt i; 1831 1832 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1833 for (i=0;i<ctx->benign_n;i++) { 1834 PetscScalar sum,val; 1835 const PetscInt *idxs; 1836 PetscInt nz,j; 1837 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1838 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1839 val = -ay[idxs[nz-1]]; 1840 if (ctx->apply_p0) { 1841 sum = 0.; 1842 for (j=0;j<nz-1;j++) { 1843 sum += ay[idxs[j]]; 1844 ay[idxs[j]] += val; 1845 } 1846 ay[idxs[nz-1]] += sum; 1847 } else { 1848 for (j=0;j<nz-1;j++) { 1849 ay[idxs[j]] += val; 1850 } 1851 ay[idxs[nz-1]] = 0.; 1852 } 1853 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1854 } 1855 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1856 } 1857 PetscFunctionReturn(0); 1858 } 1859 1860 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1861 { 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1866 PetscFunctionReturn(0); 1867 } 1868 1869 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1870 { 1871 PetscErrorCode ierr; 1872 1873 PetscFunctionBegin; 1874 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1879 { 1880 PC_IS *pcis = (PC_IS*)pc->data; 1881 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1882 PCBDDCBenignMatMult_ctx ctx; 1883 PetscErrorCode ierr; 1884 1885 PetscFunctionBegin; 1886 if (!restore) { 1887 Mat A_IB,A_BI; 1888 PetscScalar *work; 1889 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1890 1891 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1892 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1893 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1894 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1895 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1896 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1897 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1898 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1899 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1900 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1901 ctx->apply_left = PETSC_TRUE; 1902 ctx->apply_right = PETSC_FALSE; 1903 ctx->apply_p0 = PETSC_FALSE; 1904 ctx->benign_n = pcbddc->benign_n; 1905 if (reuse) { 1906 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1907 ctx->free = PETSC_FALSE; 1908 } else { /* TODO: could be optimized for successive solves */ 1909 ISLocalToGlobalMapping N_to_D; 1910 PetscInt i; 1911 1912 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1913 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1914 for (i=0;i<pcbddc->benign_n;i++) { 1915 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1916 } 1917 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1918 ctx->free = PETSC_TRUE; 1919 } 1920 ctx->A = pcis->A_IB; 1921 ctx->work = work; 1922 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1923 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1924 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1925 pcis->A_IB = A_IB; 1926 1927 /* A_BI as A_IB^T */ 1928 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1929 pcbddc->benign_original_mat = pcis->A_BI; 1930 pcis->A_BI = A_BI; 1931 } else { 1932 if (!pcbddc->benign_original_mat) { 1933 PetscFunctionReturn(0); 1934 } 1935 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1936 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1937 pcis->A_IB = ctx->A; 1938 ctx->A = NULL; 1939 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1940 pcis->A_BI = pcbddc->benign_original_mat; 1941 pcbddc->benign_original_mat = NULL; 1942 if (ctx->free) { 1943 PetscInt i; 1944 for (i=0;i<ctx->benign_n;i++) { 1945 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1946 } 1947 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1948 } 1949 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1950 ierr = PetscFree(ctx);CHKERRQ(ierr); 1951 } 1952 PetscFunctionReturn(0); 1953 } 1954 1955 /* used just in bddc debug mode */ 1956 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1957 { 1958 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1959 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1960 Mat An; 1961 PetscErrorCode ierr; 1962 1963 PetscFunctionBegin; 1964 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1965 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1966 if (is1) { 1967 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1968 ierr = MatDestroy(&An);CHKERRQ(ierr); 1969 } else { 1970 *B = An; 1971 } 1972 PetscFunctionReturn(0); 1973 } 1974 1975 /* TODO: add reuse flag */ 1976 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1977 { 1978 Mat Bt; 1979 PetscScalar *a,*bdata; 1980 const PetscInt *ii,*ij; 1981 PetscInt m,n,i,nnz,*bii,*bij; 1982 PetscBool flg_row; 1983 PetscErrorCode ierr; 1984 1985 PetscFunctionBegin; 1986 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1987 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1988 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1989 nnz = n; 1990 for (i=0;i<ii[n];i++) { 1991 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1992 } 1993 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1994 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1995 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1996 nnz = 0; 1997 bii[0] = 0; 1998 for (i=0;i<n;i++) { 1999 PetscInt j; 2000 for (j=ii[i];j<ii[i+1];j++) { 2001 PetscScalar entry = a[j]; 2002 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2003 bij[nnz] = ij[j]; 2004 bdata[nnz] = entry; 2005 nnz++; 2006 } 2007 } 2008 bii[i+1] = nnz; 2009 } 2010 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2011 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2012 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2013 { 2014 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2015 b->free_a = PETSC_TRUE; 2016 b->free_ij = PETSC_TRUE; 2017 } 2018 *B = Bt; 2019 PetscFunctionReturn(0); 2020 } 2021 2022 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 2023 { 2024 Mat B; 2025 IS is_dummy,*cc_n; 2026 ISLocalToGlobalMapping l2gmap_dummy; 2027 PCBDDCGraph graph; 2028 PetscInt i,n; 2029 PetscInt *xadj,*adjncy; 2030 PetscInt *xadj_filtered,*adjncy_filtered; 2031 PetscBool flg_row,isseqaij; 2032 PetscErrorCode ierr; 2033 2034 PetscFunctionBegin; 2035 if (!A->rmap->N || !A->cmap->N) { 2036 *ncc = 0; 2037 *cc = NULL; 2038 PetscFunctionReturn(0); 2039 } 2040 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2041 if (!isseqaij && filter) { 2042 PetscBool isseqdense; 2043 2044 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2045 if (!isseqdense) { 2046 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2047 } else { /* TODO: rectangular case and LDA */ 2048 PetscScalar *array; 2049 PetscReal chop=1.e-6; 2050 2051 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2052 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2053 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2054 for (i=0;i<n;i++) { 2055 PetscInt j; 2056 for (j=i+1;j<n;j++) { 2057 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2058 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2059 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2060 } 2061 } 2062 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2063 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2064 } 2065 } else { 2066 B = A; 2067 } 2068 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2069 2070 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2071 if (filter) { 2072 PetscScalar *data; 2073 PetscInt j,cum; 2074 2075 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2076 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2077 cum = 0; 2078 for (i=0;i<n;i++) { 2079 PetscInt t; 2080 2081 for (j=xadj[i];j<xadj[i+1];j++) { 2082 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2083 continue; 2084 } 2085 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2086 } 2087 t = xadj_filtered[i]; 2088 xadj_filtered[i] = cum; 2089 cum += t; 2090 } 2091 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2092 } else { 2093 xadj_filtered = NULL; 2094 adjncy_filtered = NULL; 2095 } 2096 2097 /* compute local connected components using PCBDDCGraph */ 2098 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2099 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2100 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2101 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2102 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2103 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2104 if (xadj_filtered) { 2105 graph->xadj = xadj_filtered; 2106 graph->adjncy = adjncy_filtered; 2107 } else { 2108 graph->xadj = xadj; 2109 graph->adjncy = adjncy; 2110 } 2111 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2112 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2113 /* partial clean up */ 2114 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2115 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2116 if (A != B) { 2117 ierr = MatDestroy(&B);CHKERRQ(ierr); 2118 } 2119 2120 /* get back data */ 2121 if (ncc) *ncc = graph->ncc; 2122 if (cc) { 2123 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2124 for (i=0;i<graph->ncc;i++) { 2125 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2126 } 2127 *cc = cc_n; 2128 } 2129 /* clean up graph */ 2130 graph->xadj = 0; 2131 graph->adjncy = 0; 2132 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2137 { 2138 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2139 PC_IS* pcis = (PC_IS*)(pc->data); 2140 IS dirIS = NULL; 2141 PetscInt i; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2146 if (zerodiag) { 2147 Mat A; 2148 Vec vec3_N; 2149 PetscScalar *vals; 2150 const PetscInt *idxs; 2151 PetscInt nz,*count; 2152 2153 /* p0 */ 2154 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2155 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2156 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2157 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2158 for (i=0;i<nz;i++) vals[i] = 1.; 2159 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2160 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2161 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2162 /* v_I */ 2163 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2164 for (i=0;i<nz;i++) vals[i] = 0.; 2165 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2166 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2167 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2168 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2169 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2170 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2171 if (dirIS) { 2172 PetscInt n; 2173 2174 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2175 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2176 for (i=0;i<n;i++) vals[i] = 0.; 2177 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2178 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2179 } 2180 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2181 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2182 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2183 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2184 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2185 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2186 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2187 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2188 ierr = PetscFree(vals);CHKERRQ(ierr); 2189 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2190 2191 /* there should not be any pressure dofs lying on the interface */ 2192 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2193 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2194 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2195 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2196 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2197 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2198 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2199 ierr = PetscFree(count);CHKERRQ(ierr); 2200 } 2201 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2202 2203 /* check PCBDDCBenignGetOrSetP0 */ 2204 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2205 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2206 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2207 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2208 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2209 for (i=0;i<pcbddc->benign_n;i++) { 2210 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2211 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2212 } 2213 PetscFunctionReturn(0); 2214 } 2215 2216 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2217 { 2218 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2219 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2220 PetscInt nz,n; 2221 PetscInt *interior_dofs,n_interior_dofs,nneu; 2222 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2223 PetscErrorCode ierr; 2224 2225 PetscFunctionBegin; 2226 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2227 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2228 for (n=0;n<pcbddc->benign_n;n++) { 2229 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2230 } 2231 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2232 pcbddc->benign_n = 0; 2233 2234 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2235 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2236 Checks if all the pressure dofs in each subdomain have a zero diagonal 2237 If not, a change of basis on pressures is not needed 2238 since the local Schur complements are already SPD 2239 */ 2240 has_null_pressures = PETSC_TRUE; 2241 have_null = PETSC_TRUE; 2242 if (pcbddc->n_ISForDofsLocal) { 2243 IS iP = NULL; 2244 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2245 2246 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2247 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2248 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2249 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2250 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2251 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2252 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2253 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2254 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2255 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2256 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2257 if (iP) { 2258 IS newpressures; 2259 2260 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2261 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2262 pressures = newpressures; 2263 } 2264 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2265 if (!sorted) { 2266 ierr = ISSort(pressures);CHKERRQ(ierr); 2267 } 2268 } else { 2269 pressures = NULL; 2270 } 2271 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2272 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2273 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2274 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2275 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2276 if (!sorted) { 2277 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2278 } 2279 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2280 zerodiag_save = zerodiag; 2281 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2282 if (!nz) { 2283 if (n) have_null = PETSC_FALSE; 2284 has_null_pressures = PETSC_FALSE; 2285 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2286 } 2287 recompute_zerodiag = PETSC_FALSE; 2288 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2289 zerodiag_subs = NULL; 2290 pcbddc->benign_n = 0; 2291 n_interior_dofs = 0; 2292 interior_dofs = NULL; 2293 nneu = 0; 2294 if (pcbddc->NeumannBoundariesLocal) { 2295 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2296 } 2297 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2298 if (checkb) { /* need to compute interior nodes */ 2299 PetscInt n,i,j; 2300 PetscInt n_neigh,*neigh,*n_shared,**shared; 2301 PetscInt *iwork; 2302 2303 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2304 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2305 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2306 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2307 for (i=1;i<n_neigh;i++) 2308 for (j=0;j<n_shared[i];j++) 2309 iwork[shared[i][j]] += 1; 2310 for (i=0;i<n;i++) 2311 if (!iwork[i]) 2312 interior_dofs[n_interior_dofs++] = i; 2313 ierr = PetscFree(iwork);CHKERRQ(ierr); 2314 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2315 } 2316 if (has_null_pressures) { 2317 IS *subs; 2318 PetscInt nsubs,i,j,nl; 2319 const PetscInt *idxs; 2320 PetscScalar *array; 2321 Vec *work; 2322 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2323 2324 subs = pcbddc->local_subs; 2325 nsubs = pcbddc->n_local_subs; 2326 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2327 if (checkb) { 2328 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2329 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2330 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2331 /* work[0] = 1_p */ 2332 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2333 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2334 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2335 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2336 /* work[0] = 1_v */ 2337 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2338 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2339 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2340 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2341 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2342 } 2343 if (nsubs > 1) { 2344 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2345 for (i=0;i<nsubs;i++) { 2346 ISLocalToGlobalMapping l2g; 2347 IS t_zerodiag_subs; 2348 PetscInt nl; 2349 2350 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2351 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2352 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2353 if (nl) { 2354 PetscBool valid = PETSC_TRUE; 2355 2356 if (checkb) { 2357 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2358 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2359 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2360 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2361 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2362 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2363 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2364 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2365 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2366 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2367 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2368 for (j=0;j<n_interior_dofs;j++) { 2369 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2370 valid = PETSC_FALSE; 2371 break; 2372 } 2373 } 2374 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2375 } 2376 if (valid && nneu) { 2377 const PetscInt *idxs; 2378 PetscInt nzb; 2379 2380 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2381 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2382 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2383 if (nzb) valid = PETSC_FALSE; 2384 } 2385 if (valid && pressures) { 2386 IS t_pressure_subs; 2387 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2388 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2389 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2390 } 2391 if (valid) { 2392 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2393 pcbddc->benign_n++; 2394 } else { 2395 recompute_zerodiag = PETSC_TRUE; 2396 } 2397 } 2398 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2399 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2400 } 2401 } else { /* there's just one subdomain (or zero if they have not been detected */ 2402 PetscBool valid = PETSC_TRUE; 2403 2404 if (nneu) valid = PETSC_FALSE; 2405 if (valid && pressures) { 2406 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2407 } 2408 if (valid && checkb) { 2409 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2410 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2411 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2412 for (j=0;j<n_interior_dofs;j++) { 2413 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2414 valid = PETSC_FALSE; 2415 break; 2416 } 2417 } 2418 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2419 } 2420 if (valid) { 2421 pcbddc->benign_n = 1; 2422 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2423 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2424 zerodiag_subs[0] = zerodiag; 2425 } 2426 } 2427 if (checkb) { 2428 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2429 } 2430 } 2431 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2432 2433 if (!pcbddc->benign_n) { 2434 PetscInt n; 2435 2436 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2437 recompute_zerodiag = PETSC_FALSE; 2438 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2439 if (n) { 2440 has_null_pressures = PETSC_FALSE; 2441 have_null = PETSC_FALSE; 2442 } 2443 } 2444 2445 /* final check for null pressures */ 2446 if (zerodiag && pressures) { 2447 PetscInt nz,np; 2448 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2449 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2450 if (nz != np) have_null = PETSC_FALSE; 2451 } 2452 2453 if (recompute_zerodiag) { 2454 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2455 if (pcbddc->benign_n == 1) { 2456 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2457 zerodiag = zerodiag_subs[0]; 2458 } else { 2459 PetscInt i,nzn,*new_idxs; 2460 2461 nzn = 0; 2462 for (i=0;i<pcbddc->benign_n;i++) { 2463 PetscInt ns; 2464 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2465 nzn += ns; 2466 } 2467 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2468 nzn = 0; 2469 for (i=0;i<pcbddc->benign_n;i++) { 2470 PetscInt ns,*idxs; 2471 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2472 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2473 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2474 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2475 nzn += ns; 2476 } 2477 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2478 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2479 } 2480 have_null = PETSC_FALSE; 2481 } 2482 2483 /* Prepare matrix to compute no-net-flux */ 2484 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2485 Mat A,loc_divudotp; 2486 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2487 IS row,col,isused = NULL; 2488 PetscInt M,N,n,st,n_isused; 2489 2490 if (pressures) { 2491 isused = pressures; 2492 } else { 2493 isused = zerodiag_save; 2494 } 2495 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2496 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2497 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2498 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2499 n_isused = 0; 2500 if (isused) { 2501 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2502 } 2503 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2504 st = st-n_isused; 2505 if (n) { 2506 const PetscInt *gidxs; 2507 2508 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2509 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2510 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2511 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2512 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2513 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2514 } else { 2515 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2516 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2517 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2518 } 2519 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2520 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2521 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2522 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2523 ierr = ISDestroy(&row);CHKERRQ(ierr); 2524 ierr = ISDestroy(&col);CHKERRQ(ierr); 2525 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2526 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2527 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2528 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2529 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2530 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2531 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2532 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2533 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2534 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2535 } 2536 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2537 2538 /* change of basis and p0 dofs */ 2539 if (has_null_pressures) { 2540 IS zerodiagc; 2541 const PetscInt *idxs,*idxsc; 2542 PetscInt i,s,*nnz; 2543 2544 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2545 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2546 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2547 /* local change of basis for pressures */ 2548 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2549 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2550 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2551 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2552 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2553 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2554 for (i=0;i<pcbddc->benign_n;i++) { 2555 PetscInt nzs,j; 2556 2557 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2558 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2559 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2560 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2561 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2562 } 2563 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2564 ierr = PetscFree(nnz);CHKERRQ(ierr); 2565 /* set identity on velocities */ 2566 for (i=0;i<n-nz;i++) { 2567 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2568 } 2569 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2570 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2571 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2572 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2573 /* set change on pressures */ 2574 for (s=0;s<pcbddc->benign_n;s++) { 2575 PetscScalar *array; 2576 PetscInt nzs; 2577 2578 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2579 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2580 for (i=0;i<nzs-1;i++) { 2581 PetscScalar vals[2]; 2582 PetscInt cols[2]; 2583 2584 cols[0] = idxs[i]; 2585 cols[1] = idxs[nzs-1]; 2586 vals[0] = 1.; 2587 vals[1] = 1.; 2588 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2589 } 2590 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2591 for (i=0;i<nzs-1;i++) array[i] = -1.; 2592 array[nzs-1] = 1.; 2593 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2594 /* store local idxs for p0 */ 2595 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2596 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2597 ierr = PetscFree(array);CHKERRQ(ierr); 2598 } 2599 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2600 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2601 /* project if needed */ 2602 if (pcbddc->benign_change_explicit) { 2603 Mat M; 2604 2605 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2606 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2607 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2608 ierr = MatDestroy(&M);CHKERRQ(ierr); 2609 } 2610 /* store global idxs for p0 */ 2611 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2612 } 2613 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2614 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2615 2616 /* determines if the coarse solver will be singular or not */ 2617 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2618 /* determines if the problem has subdomains with 0 pressure block */ 2619 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2620 *zerodiaglocal = zerodiag; 2621 PetscFunctionReturn(0); 2622 } 2623 2624 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2625 { 2626 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2627 PetscScalar *array; 2628 PetscErrorCode ierr; 2629 2630 PetscFunctionBegin; 2631 if (!pcbddc->benign_sf) { 2632 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2633 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2634 } 2635 if (get) { 2636 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2637 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2638 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2639 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2640 } else { 2641 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2642 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2643 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2644 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2645 } 2646 PetscFunctionReturn(0); 2647 } 2648 2649 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2650 { 2651 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2652 PetscErrorCode ierr; 2653 2654 PetscFunctionBegin; 2655 /* TODO: add error checking 2656 - avoid nested pop (or push) calls. 2657 - cannot push before pop. 2658 - cannot call this if pcbddc->local_mat is NULL 2659 */ 2660 if (!pcbddc->benign_n) { 2661 PetscFunctionReturn(0); 2662 } 2663 if (pop) { 2664 if (pcbddc->benign_change_explicit) { 2665 IS is_p0; 2666 MatReuse reuse; 2667 2668 /* extract B_0 */ 2669 reuse = MAT_INITIAL_MATRIX; 2670 if (pcbddc->benign_B0) { 2671 reuse = MAT_REUSE_MATRIX; 2672 } 2673 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2674 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2675 /* remove rows and cols from local problem */ 2676 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2677 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2678 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2679 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2680 } else { 2681 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2682 PetscScalar *vals; 2683 PetscInt i,n,*idxs_ins; 2684 2685 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2686 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2687 if (!pcbddc->benign_B0) { 2688 PetscInt *nnz; 2689 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2690 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2691 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2692 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2693 for (i=0;i<pcbddc->benign_n;i++) { 2694 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2695 nnz[i] = n - nnz[i]; 2696 } 2697 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2698 ierr = PetscFree(nnz);CHKERRQ(ierr); 2699 } 2700 2701 for (i=0;i<pcbddc->benign_n;i++) { 2702 PetscScalar *array; 2703 PetscInt *idxs,j,nz,cum; 2704 2705 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2706 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2707 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2708 for (j=0;j<nz;j++) vals[j] = 1.; 2709 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2710 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2711 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2712 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2713 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2714 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2715 cum = 0; 2716 for (j=0;j<n;j++) { 2717 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2718 vals[cum] = array[j]; 2719 idxs_ins[cum] = j; 2720 cum++; 2721 } 2722 } 2723 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2724 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2725 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2726 } 2727 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2728 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2729 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2730 } 2731 } else { /* push */ 2732 if (pcbddc->benign_change_explicit) { 2733 PetscInt i; 2734 2735 for (i=0;i<pcbddc->benign_n;i++) { 2736 PetscScalar *B0_vals; 2737 PetscInt *B0_cols,B0_ncol; 2738 2739 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2740 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2741 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2742 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2743 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2744 } 2745 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2746 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2747 } else { 2748 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2749 } 2750 } 2751 PetscFunctionReturn(0); 2752 } 2753 2754 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2755 { 2756 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2757 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2758 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2759 PetscBLASInt *B_iwork,*B_ifail; 2760 PetscScalar *work,lwork; 2761 PetscScalar *St,*S,*eigv; 2762 PetscScalar *Sarray,*Starray; 2763 PetscReal *eigs,thresh; 2764 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2765 PetscBool allocated_S_St; 2766 #if defined(PETSC_USE_COMPLEX) 2767 PetscReal *rwork; 2768 #endif 2769 PetscErrorCode ierr; 2770 2771 PetscFunctionBegin; 2772 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2773 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2774 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2775 2776 if (pcbddc->dbg_flag) { 2777 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2778 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2779 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2780 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2781 } 2782 2783 if (pcbddc->dbg_flag) { 2784 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2785 } 2786 2787 /* max size of subsets */ 2788 mss = 0; 2789 for (i=0;i<sub_schurs->n_subs;i++) { 2790 PetscInt subset_size; 2791 2792 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2793 mss = PetscMax(mss,subset_size); 2794 } 2795 2796 /* min/max and threshold */ 2797 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2798 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2799 nmax = PetscMax(nmin,nmax); 2800 allocated_S_St = PETSC_FALSE; 2801 if (nmin) { 2802 allocated_S_St = PETSC_TRUE; 2803 } 2804 2805 /* allocate lapack workspace */ 2806 cum = cum2 = 0; 2807 maxneigs = 0; 2808 for (i=0;i<sub_schurs->n_subs;i++) { 2809 PetscInt n,subset_size; 2810 2811 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2812 n = PetscMin(subset_size,nmax); 2813 cum += subset_size; 2814 cum2 += subset_size*n; 2815 maxneigs = PetscMax(maxneigs,n); 2816 } 2817 if (mss) { 2818 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2819 PetscBLASInt B_itype = 1; 2820 PetscBLASInt B_N = mss; 2821 PetscReal zero = 0.0; 2822 PetscReal eps = 0.0; /* dlamch? */ 2823 2824 B_lwork = -1; 2825 S = NULL; 2826 St = NULL; 2827 eigs = NULL; 2828 eigv = NULL; 2829 B_iwork = NULL; 2830 B_ifail = NULL; 2831 #if defined(PETSC_USE_COMPLEX) 2832 rwork = NULL; 2833 #endif 2834 thresh = 1.0; 2835 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2836 #if defined(PETSC_USE_COMPLEX) 2837 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2838 #else 2839 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2840 #endif 2841 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2842 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2843 } else { 2844 /* TODO */ 2845 } 2846 } else { 2847 lwork = 0; 2848 } 2849 2850 nv = 0; 2851 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2852 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2853 } 2854 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2855 if (allocated_S_St) { 2856 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2857 } 2858 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2859 #if defined(PETSC_USE_COMPLEX) 2860 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2861 #endif 2862 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2863 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2864 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2865 nv+cum,&pcbddc->adaptive_constraints_idxs, 2866 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2867 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2868 2869 maxneigs = 0; 2870 cum = cumarray = 0; 2871 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2872 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2873 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2874 const PetscInt *idxs; 2875 2876 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2877 for (cum=0;cum<nv;cum++) { 2878 pcbddc->adaptive_constraints_n[cum] = 1; 2879 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2880 pcbddc->adaptive_constraints_data[cum] = 1.0; 2881 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2882 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2883 } 2884 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2885 } 2886 2887 if (mss) { /* multilevel */ 2888 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2889 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2890 } 2891 2892 thresh = pcbddc->adaptive_threshold; 2893 for (i=0;i<sub_schurs->n_subs;i++) { 2894 const PetscInt *idxs; 2895 PetscReal upper,lower; 2896 PetscInt j,subset_size,eigs_start = 0; 2897 PetscBLASInt B_N; 2898 PetscBool same_data = PETSC_FALSE; 2899 2900 if (pcbddc->use_deluxe_scaling) { 2901 upper = PETSC_MAX_REAL; 2902 lower = thresh; 2903 } else { 2904 upper = 1./thresh; 2905 lower = 0.; 2906 } 2907 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2908 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2909 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2910 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2911 if (sub_schurs->is_hermitian) { 2912 PetscInt j,k; 2913 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2914 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2915 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2916 } 2917 for (j=0;j<subset_size;j++) { 2918 for (k=j;k<subset_size;k++) { 2919 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2920 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2921 } 2922 } 2923 } else { 2924 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2925 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2926 } 2927 } else { 2928 S = Sarray + cumarray; 2929 St = Starray + cumarray; 2930 } 2931 /* see if we can save some work */ 2932 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2933 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2934 } 2935 2936 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2937 B_neigs = 0; 2938 } else { 2939 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2940 PetscBLASInt B_itype = 1; 2941 PetscBLASInt B_IL, B_IU; 2942 PetscReal eps = -1.0; /* dlamch? */ 2943 PetscInt nmin_s; 2944 PetscBool compute_range = PETSC_FALSE; 2945 2946 if (pcbddc->dbg_flag) { 2947 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2948 } 2949 2950 compute_range = PETSC_FALSE; 2951 if (thresh > 1.+PETSC_SMALL && !same_data) { 2952 compute_range = PETSC_TRUE; 2953 } 2954 2955 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2956 if (compute_range) { 2957 2958 /* ask for eigenvalues larger than thresh */ 2959 #if defined(PETSC_USE_COMPLEX) 2960 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2961 #else 2962 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2963 #endif 2964 } else if (!same_data) { 2965 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2966 B_IL = 1; 2967 #if defined(PETSC_USE_COMPLEX) 2968 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2969 #else 2970 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2971 #endif 2972 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2973 PetscInt k; 2974 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2975 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2976 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2977 nmin = nmax; 2978 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2979 for (k=0;k<nmax;k++) { 2980 eigs[k] = 1./PETSC_SMALL; 2981 eigv[k*(subset_size+1)] = 1.0; 2982 } 2983 } 2984 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2985 if (B_ierr) { 2986 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2987 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2988 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2989 } 2990 2991 if (B_neigs > nmax) { 2992 if (pcbddc->dbg_flag) { 2993 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2994 } 2995 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2996 B_neigs = nmax; 2997 } 2998 2999 nmin_s = PetscMin(nmin,B_N); 3000 if (B_neigs < nmin_s) { 3001 PetscBLASInt B_neigs2; 3002 3003 if (pcbddc->use_deluxe_scaling) { 3004 B_IL = B_N - nmin_s + 1; 3005 B_IU = B_N - B_neigs; 3006 } else { 3007 B_IL = B_neigs + 1; 3008 B_IU = nmin_s; 3009 } 3010 if (pcbddc->dbg_flag) { 3011 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3012 } 3013 if (sub_schurs->is_hermitian) { 3014 PetscInt j,k; 3015 for (j=0;j<subset_size;j++) { 3016 for (k=j;k<subset_size;k++) { 3017 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3018 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3019 } 3020 } 3021 } else { 3022 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3023 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3024 } 3025 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3026 #if defined(PETSC_USE_COMPLEX) 3027 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3028 #else 3029 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3030 #endif 3031 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3032 B_neigs += B_neigs2; 3033 } 3034 if (B_ierr) { 3035 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3036 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3037 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3038 } 3039 if (pcbddc->dbg_flag) { 3040 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3041 for (j=0;j<B_neigs;j++) { 3042 if (eigs[j] == 0.0) { 3043 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3044 } else { 3045 if (pcbddc->use_deluxe_scaling) { 3046 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3047 } else { 3048 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3049 } 3050 } 3051 } 3052 } 3053 } else { 3054 /* TODO */ 3055 } 3056 } 3057 /* change the basis back to the original one */ 3058 if (sub_schurs->change) { 3059 Mat change,phi,phit; 3060 3061 if (pcbddc->dbg_flag > 1) { 3062 PetscInt ii; 3063 for (ii=0;ii<B_neigs;ii++) { 3064 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3065 for (j=0;j<B_N;j++) { 3066 #if defined(PETSC_USE_COMPLEX) 3067 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3068 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3070 #else 3071 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3072 #endif 3073 } 3074 } 3075 } 3076 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3077 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3078 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3079 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3080 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3081 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3082 } 3083 maxneigs = PetscMax(B_neigs,maxneigs); 3084 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3085 if (B_neigs) { 3086 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3087 3088 if (pcbddc->dbg_flag > 1) { 3089 PetscInt ii; 3090 for (ii=0;ii<B_neigs;ii++) { 3091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3092 for (j=0;j<B_N;j++) { 3093 #if defined(PETSC_USE_COMPLEX) 3094 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3095 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3096 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3097 #else 3098 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3099 #endif 3100 } 3101 } 3102 } 3103 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3104 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3105 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3106 cum++; 3107 } 3108 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3109 /* shift for next computation */ 3110 cumarray += subset_size*subset_size; 3111 } 3112 if (pcbddc->dbg_flag) { 3113 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3114 } 3115 3116 if (mss) { 3117 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3118 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3119 /* destroy matrices (junk) */ 3120 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3121 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3122 } 3123 if (allocated_S_St) { 3124 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3125 } 3126 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3127 #if defined(PETSC_USE_COMPLEX) 3128 ierr = PetscFree(rwork);CHKERRQ(ierr); 3129 #endif 3130 if (pcbddc->dbg_flag) { 3131 PetscInt maxneigs_r; 3132 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3134 } 3135 PetscFunctionReturn(0); 3136 } 3137 3138 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3139 { 3140 PetscScalar *coarse_submat_vals; 3141 PetscErrorCode ierr; 3142 3143 PetscFunctionBegin; 3144 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3145 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3146 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3147 3148 /* Setup local neumann solver ksp_R */ 3149 /* PCBDDCSetUpLocalScatters should be called first! */ 3150 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3151 3152 /* 3153 Setup local correction and local part of coarse basis. 3154 Gives back the dense local part of the coarse matrix in column major ordering 3155 */ 3156 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3157 3158 /* Compute total number of coarse nodes and setup coarse solver */ 3159 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3160 3161 /* free */ 3162 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3163 PetscFunctionReturn(0); 3164 } 3165 3166 PetscErrorCode PCBDDCResetCustomization(PC pc) 3167 { 3168 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3169 PetscErrorCode ierr; 3170 3171 PetscFunctionBegin; 3172 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3173 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3174 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3175 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3176 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3177 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3178 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3179 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3180 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3181 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3182 PetscFunctionReturn(0); 3183 } 3184 3185 PetscErrorCode PCBDDCResetTopography(PC pc) 3186 { 3187 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3188 PetscInt i; 3189 PetscErrorCode ierr; 3190 3191 PetscFunctionBegin; 3192 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3193 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3194 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3195 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3196 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3197 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3198 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3199 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3200 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3201 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3202 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3203 for (i=0;i<pcbddc->n_local_subs;i++) { 3204 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3205 } 3206 pcbddc->n_local_subs = 0; 3207 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3208 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3209 pcbddc->graphanalyzed = PETSC_FALSE; 3210 pcbddc->recompute_topography = PETSC_TRUE; 3211 PetscFunctionReturn(0); 3212 } 3213 3214 PetscErrorCode PCBDDCResetSolvers(PC pc) 3215 { 3216 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3217 PetscErrorCode ierr; 3218 3219 PetscFunctionBegin; 3220 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3221 if (pcbddc->coarse_phi_B) { 3222 PetscScalar *array; 3223 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3224 ierr = PetscFree(array);CHKERRQ(ierr); 3225 } 3226 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3227 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3228 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3229 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3230 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3231 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3232 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3233 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3234 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3235 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3236 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3237 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3238 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3239 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3240 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3241 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3242 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3243 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3244 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3245 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3246 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3247 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3248 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3249 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3250 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3251 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3252 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3253 if (pcbddc->benign_zerodiag_subs) { 3254 PetscInt i; 3255 for (i=0;i<pcbddc->benign_n;i++) { 3256 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3257 } 3258 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3259 } 3260 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3261 PetscFunctionReturn(0); 3262 } 3263 3264 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3265 { 3266 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3267 PC_IS *pcis = (PC_IS*)pc->data; 3268 VecType impVecType; 3269 PetscInt n_constraints,n_R,old_size; 3270 PetscErrorCode ierr; 3271 3272 PetscFunctionBegin; 3273 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3274 n_R = pcis->n - pcbddc->n_vertices; 3275 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3276 /* local work vectors (try to avoid unneeded work)*/ 3277 /* R nodes */ 3278 old_size = -1; 3279 if (pcbddc->vec1_R) { 3280 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3281 } 3282 if (n_R != old_size) { 3283 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3284 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3285 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3286 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3287 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3288 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3289 } 3290 /* local primal dofs */ 3291 old_size = -1; 3292 if (pcbddc->vec1_P) { 3293 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3294 } 3295 if (pcbddc->local_primal_size != old_size) { 3296 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3297 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3298 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3299 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3300 } 3301 /* local explicit constraints */ 3302 old_size = -1; 3303 if (pcbddc->vec1_C) { 3304 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3305 } 3306 if (n_constraints && n_constraints != old_size) { 3307 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3308 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3309 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3310 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3311 } 3312 PetscFunctionReturn(0); 3313 } 3314 3315 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3316 { 3317 PetscErrorCode ierr; 3318 /* pointers to pcis and pcbddc */ 3319 PC_IS* pcis = (PC_IS*)pc->data; 3320 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3321 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3322 /* submatrices of local problem */ 3323 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3324 /* submatrices of local coarse problem */ 3325 Mat S_VV,S_CV,S_VC,S_CC; 3326 /* working matrices */ 3327 Mat C_CR; 3328 /* additional working stuff */ 3329 PC pc_R; 3330 Mat F,Brhs = NULL; 3331 Vec dummy_vec; 3332 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3333 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3334 PetscScalar *work; 3335 PetscInt *idx_V_B; 3336 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3337 PetscInt i,n_R,n_D,n_B; 3338 3339 /* some shortcuts to scalars */ 3340 PetscScalar one=1.0,m_one=-1.0; 3341 3342 PetscFunctionBegin; 3343 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3344 3345 /* Set Non-overlapping dimensions */ 3346 n_vertices = pcbddc->n_vertices; 3347 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3348 n_B = pcis->n_B; 3349 n_D = pcis->n - n_B; 3350 n_R = pcis->n - n_vertices; 3351 3352 /* vertices in boundary numbering */ 3353 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3354 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3355 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3356 3357 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3358 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3359 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3360 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3361 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3362 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3363 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3364 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3365 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3366 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3367 3368 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3369 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3370 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3371 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3372 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3373 lda_rhs = n_R; 3374 need_benign_correction = PETSC_FALSE; 3375 if (isLU || isILU || isCHOL) { 3376 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3377 } else if (sub_schurs && sub_schurs->reuse_solver) { 3378 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3379 MatFactorType type; 3380 3381 F = reuse_solver->F; 3382 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3383 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3384 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3385 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3386 } else { 3387 F = NULL; 3388 } 3389 3390 /* determine if we can use a sparse right-hand side */ 3391 sparserhs = PETSC_FALSE; 3392 if (F) { 3393 const MatSolverPackage solver; 3394 3395 ierr = MatFactorGetSolverPackage(F,&solver);CHKERRQ(ierr); 3396 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3397 } 3398 3399 /* allocate workspace */ 3400 n = 0; 3401 if (n_constraints) { 3402 n += lda_rhs*n_constraints; 3403 } 3404 if (n_vertices) { 3405 n = PetscMax(2*lda_rhs*n_vertices,n); 3406 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3407 } 3408 if (!pcbddc->symmetric_primal) { 3409 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3410 } 3411 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3412 3413 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3414 dummy_vec = NULL; 3415 if (need_benign_correction && lda_rhs != n_R && F) { 3416 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3417 } 3418 3419 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3420 if (n_constraints) { 3421 Mat M1,M2,M3,C_B; 3422 IS is_aux; 3423 PetscScalar *array,*array2; 3424 3425 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3426 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3427 3428 /* Extract constraints on R nodes: C_{CR} */ 3429 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3430 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3431 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3432 3433 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3434 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3435 if (!sparserhs) { 3436 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3437 for (i=0;i<n_constraints;i++) { 3438 const PetscScalar *row_cmat_values; 3439 const PetscInt *row_cmat_indices; 3440 PetscInt size_of_constraint,j; 3441 3442 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3443 for (j=0;j<size_of_constraint;j++) { 3444 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3445 } 3446 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3447 } 3448 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3449 } else { 3450 Mat tC_CR; 3451 3452 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3453 if (lda_rhs != n_R) { 3454 PetscScalar *aa; 3455 PetscInt r,*ii,*jj; 3456 PetscBool done; 3457 3458 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3459 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");CHKERRQ(ierr); 3460 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3461 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3462 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3463 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");CHKERRQ(ierr); 3464 } else { 3465 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3466 tC_CR = C_CR; 3467 } 3468 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3469 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3470 } 3471 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3472 if (F) { 3473 if (need_benign_correction) { 3474 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3475 3476 /* rhs is already zero on interior dofs, no need to change the rhs */ 3477 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3478 } 3479 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3480 if (need_benign_correction) { 3481 PetscScalar *marr; 3482 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3483 3484 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3485 if (lda_rhs != n_R) { 3486 for (i=0;i<n_constraints;i++) { 3487 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3488 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3489 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3490 } 3491 } else { 3492 for (i=0;i<n_constraints;i++) { 3493 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3494 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3495 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3496 } 3497 } 3498 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3499 } 3500 } else { 3501 PetscScalar *marr; 3502 3503 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3504 for (i=0;i<n_constraints;i++) { 3505 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3506 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3507 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3508 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3509 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3510 } 3511 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3512 } 3513 if (sparserhs) { 3514 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3515 } 3516 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3517 if (!pcbddc->switch_static) { 3518 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3519 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3520 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3521 for (i=0;i<n_constraints;i++) { 3522 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3523 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3524 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3525 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3526 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3527 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3528 } 3529 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3530 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3531 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3532 } else { 3533 if (lda_rhs != n_R) { 3534 IS dummy; 3535 3536 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3537 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3538 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3539 } else { 3540 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3541 pcbddc->local_auxmat2 = local_auxmat2_R; 3542 } 3543 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3544 } 3545 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3546 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3547 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3548 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3549 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3550 if (isCHOL) { 3551 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3552 } else { 3553 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3554 } 3555 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3556 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3557 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3558 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3559 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3560 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3561 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3562 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3563 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3564 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3565 } 3566 3567 /* Get submatrices from subdomain matrix */ 3568 if (n_vertices) { 3569 IS is_aux; 3570 PetscBool isseqaij; 3571 3572 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3573 IS tis; 3574 3575 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3576 ierr = ISSort(tis);CHKERRQ(ierr); 3577 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3578 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3579 } else { 3580 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3581 } 3582 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3583 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3584 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3585 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3586 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3587 } 3588 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3589 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3590 } 3591 3592 /* Matrix of coarse basis functions (local) */ 3593 if (pcbddc->coarse_phi_B) { 3594 PetscInt on_B,on_primal,on_D=n_D; 3595 if (pcbddc->coarse_phi_D) { 3596 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3597 } 3598 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3599 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3600 PetscScalar *marray; 3601 3602 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3603 ierr = PetscFree(marray);CHKERRQ(ierr); 3604 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3605 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3606 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3607 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3608 } 3609 } 3610 3611 if (!pcbddc->coarse_phi_B) { 3612 PetscScalar *marr; 3613 3614 /* memory size */ 3615 n = n_B*pcbddc->local_primal_size; 3616 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3617 if (!pcbddc->symmetric_primal) n *= 2; 3618 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3620 marr += n_B*pcbddc->local_primal_size; 3621 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3622 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3623 marr += n_D*pcbddc->local_primal_size; 3624 } 3625 if (!pcbddc->symmetric_primal) { 3626 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3627 marr += n_B*pcbddc->local_primal_size; 3628 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3629 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3630 } 3631 } else { 3632 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3633 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3634 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3635 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3636 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3637 } 3638 } 3639 } 3640 3641 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3642 p0_lidx_I = NULL; 3643 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3644 const PetscInt *idxs; 3645 3646 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3647 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3648 for (i=0;i<pcbddc->benign_n;i++) { 3649 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3650 } 3651 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3652 } 3653 3654 /* vertices */ 3655 if (n_vertices) { 3656 PetscBool restoreavr = PETSC_FALSE; 3657 3658 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3659 3660 if (n_R) { 3661 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3662 PetscBLASInt B_N,B_one = 1; 3663 PetscScalar *x,*y; 3664 3665 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3666 if (need_benign_correction) { 3667 ISLocalToGlobalMapping RtoN; 3668 IS is_p0; 3669 PetscInt *idxs_p0,n; 3670 3671 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3672 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3673 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3674 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3675 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3676 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3677 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3678 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3679 } 3680 3681 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3682 if (!sparserhs || need_benign_correction) { 3683 if (lda_rhs == n_R) { 3684 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3685 } else { 3686 PetscScalar *av,*array; 3687 const PetscInt *xadj,*adjncy; 3688 PetscInt n; 3689 PetscBool flg_row; 3690 3691 array = work+lda_rhs*n_vertices; 3692 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3693 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3694 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3695 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3696 for (i=0;i<n;i++) { 3697 PetscInt j; 3698 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3699 } 3700 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3701 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3702 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3703 } 3704 if (need_benign_correction) { 3705 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3706 PetscScalar *marr; 3707 3708 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3709 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3710 3711 | 0 0 0 | (V) 3712 L = | 0 0 -1 | (P-p0) 3713 | 0 0 -1 | (p0) 3714 3715 */ 3716 for (i=0;i<reuse_solver->benign_n;i++) { 3717 const PetscScalar *vals; 3718 const PetscInt *idxs,*idxs_zero; 3719 PetscInt n,j,nz; 3720 3721 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3722 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3723 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3724 for (j=0;j<n;j++) { 3725 PetscScalar val = vals[j]; 3726 PetscInt k,col = idxs[j]; 3727 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3728 } 3729 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3730 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3731 } 3732 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3733 } 3734 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3735 Brhs = A_RV; 3736 } else { 3737 Mat tA_RVT,A_RVT; 3738 3739 if (!pcbddc->symmetric_primal) { 3740 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3741 } else { 3742 restoreavr = PETSC_TRUE; 3743 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3744 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3745 A_RVT = A_VR; 3746 } 3747 if (lda_rhs != n_R) { 3748 PetscScalar *aa; 3749 PetscInt r,*ii,*jj; 3750 PetscBool done; 3751 3752 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3753 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");CHKERRQ(ierr); 3754 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 3755 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 3756 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3757 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");CHKERRQ(ierr); 3758 } else { 3759 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 3760 tA_RVT = A_RVT; 3761 } 3762 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 3763 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 3764 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 3765 } 3766 if (F) { 3767 /* need to correct the rhs */ 3768 if (need_benign_correction) { 3769 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3770 PetscScalar *marr; 3771 3772 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 3773 if (lda_rhs != n_R) { 3774 for (i=0;i<n_vertices;i++) { 3775 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3776 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3777 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3778 } 3779 } else { 3780 for (i=0;i<n_vertices;i++) { 3781 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3782 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3783 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3784 } 3785 } 3786 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 3787 } 3788 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 3789 if (restoreavr) { 3790 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3791 } 3792 /* need to correct the solution */ 3793 if (need_benign_correction) { 3794 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3795 PetscScalar *marr; 3796 3797 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3798 if (lda_rhs != n_R) { 3799 for (i=0;i<n_vertices;i++) { 3800 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3801 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3802 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3803 } 3804 } else { 3805 for (i=0;i<n_vertices;i++) { 3806 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3807 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3808 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3809 } 3810 } 3811 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3812 } 3813 } else { 3814 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 3815 for (i=0;i<n_vertices;i++) { 3816 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3817 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3818 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3819 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3820 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3821 } 3822 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 3823 } 3824 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3825 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3826 /* S_VV and S_CV */ 3827 if (n_constraints) { 3828 Mat B; 3829 3830 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3831 for (i=0;i<n_vertices;i++) { 3832 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3833 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3836 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3837 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3838 } 3839 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3840 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3841 ierr = MatDestroy(&B);CHKERRQ(ierr); 3842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3843 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3844 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3845 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3846 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3847 ierr = MatDestroy(&B);CHKERRQ(ierr); 3848 } 3849 if (lda_rhs != n_R) { 3850 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3851 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3852 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3853 } 3854 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3855 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3856 if (need_benign_correction) { 3857 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3858 PetscScalar *marr,*sums; 3859 3860 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3861 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3862 for (i=0;i<reuse_solver->benign_n;i++) { 3863 const PetscScalar *vals; 3864 const PetscInt *idxs,*idxs_zero; 3865 PetscInt n,j,nz; 3866 3867 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3868 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3869 for (j=0;j<n_vertices;j++) { 3870 PetscInt k; 3871 sums[j] = 0.; 3872 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3873 } 3874 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3875 for (j=0;j<n;j++) { 3876 PetscScalar val = vals[j]; 3877 PetscInt k; 3878 for (k=0;k<n_vertices;k++) { 3879 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3880 } 3881 } 3882 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3883 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3884 } 3885 ierr = PetscFree(sums);CHKERRQ(ierr); 3886 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3887 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3888 } 3889 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3890 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3891 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3892 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3893 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3894 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3895 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3896 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3897 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3898 } else { 3899 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3900 } 3901 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3902 3903 /* coarse basis functions */ 3904 for (i=0;i<n_vertices;i++) { 3905 PetscScalar *y; 3906 3907 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3908 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3909 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3910 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3911 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3912 y[n_B*i+idx_V_B[i]] = 1.0; 3913 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3914 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3915 3916 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3917 PetscInt j; 3918 3919 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3920 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3921 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3922 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3923 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3924 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3925 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3926 } 3927 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3928 } 3929 /* if n_R == 0 the object is not destroyed */ 3930 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3931 } 3932 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3933 3934 if (n_constraints) { 3935 Mat B; 3936 3937 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3938 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3939 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3940 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3941 if (n_vertices) { 3942 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3943 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3944 } else { 3945 Mat S_VCt; 3946 3947 if (lda_rhs != n_R) { 3948 ierr = MatDestroy(&B);CHKERRQ(ierr); 3949 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3950 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3951 } 3952 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3953 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3954 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3955 } 3956 } 3957 ierr = MatDestroy(&B);CHKERRQ(ierr); 3958 /* coarse basis functions */ 3959 for (i=0;i<n_constraints;i++) { 3960 PetscScalar *y; 3961 3962 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3963 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3964 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3965 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3966 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3967 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3968 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3969 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3970 PetscInt j; 3971 3972 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3973 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3974 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3975 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3976 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3977 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3978 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3979 } 3980 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3981 } 3982 } 3983 if (n_constraints) { 3984 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3985 } 3986 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3987 3988 /* coarse matrix entries relative to B_0 */ 3989 if (pcbddc->benign_n) { 3990 Mat B0_B,B0_BPHI; 3991 IS is_dummy; 3992 PetscScalar *data; 3993 PetscInt j; 3994 3995 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3996 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3997 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3998 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3999 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4000 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4001 for (j=0;j<pcbddc->benign_n;j++) { 4002 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4003 for (i=0;i<pcbddc->local_primal_size;i++) { 4004 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4005 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4006 } 4007 } 4008 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4009 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4010 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4011 } 4012 4013 /* compute other basis functions for non-symmetric problems */ 4014 if (!pcbddc->symmetric_primal) { 4015 Mat B_V=NULL,B_C=NULL; 4016 PetscScalar *marray; 4017 4018 if (n_constraints) { 4019 Mat S_CCT,C_CRT; 4020 4021 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4022 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4023 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4024 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4025 if (n_vertices) { 4026 Mat S_VCT; 4027 4028 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4029 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4030 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4031 } 4032 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4033 } else { 4034 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4035 } 4036 if (n_vertices && n_R) { 4037 PetscScalar *av,*marray; 4038 const PetscInt *xadj,*adjncy; 4039 PetscInt n; 4040 PetscBool flg_row; 4041 4042 /* B_V = B_V - A_VR^T */ 4043 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4044 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4045 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4046 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4047 for (i=0;i<n;i++) { 4048 PetscInt j; 4049 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4050 } 4051 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4052 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4053 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4054 } 4055 4056 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4057 if (n_vertices) { 4058 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4059 for (i=0;i<n_vertices;i++) { 4060 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4061 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4062 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4063 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4064 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4065 } 4066 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4067 } 4068 if (B_C) { 4069 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4070 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4071 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4072 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4073 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4074 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4075 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4076 } 4077 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4078 } 4079 /* coarse basis functions */ 4080 for (i=0;i<pcbddc->local_primal_size;i++) { 4081 PetscScalar *y; 4082 4083 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4084 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4085 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4086 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4087 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4088 if (i<n_vertices) { 4089 y[n_B*i+idx_V_B[i]] = 1.0; 4090 } 4091 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4092 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4093 4094 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4095 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4096 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4097 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4098 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4099 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4100 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4101 } 4102 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4103 } 4104 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4105 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4106 } 4107 4108 /* free memory */ 4109 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4110 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4111 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4112 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4113 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4114 ierr = PetscFree(work);CHKERRQ(ierr); 4115 if (n_vertices) { 4116 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4117 } 4118 if (n_constraints) { 4119 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4120 } 4121 /* Checking coarse_sub_mat and coarse basis functios */ 4122 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4123 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4124 if (pcbddc->dbg_flag) { 4125 Mat coarse_sub_mat; 4126 Mat AUXMAT,TM1,TM2,TM3,TM4; 4127 Mat coarse_phi_D,coarse_phi_B; 4128 Mat coarse_psi_D,coarse_psi_B; 4129 Mat A_II,A_BB,A_IB,A_BI; 4130 Mat C_B,CPHI; 4131 IS is_dummy; 4132 Vec mones; 4133 MatType checkmattype=MATSEQAIJ; 4134 PetscReal real_value; 4135 4136 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4137 Mat A; 4138 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4139 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4140 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4141 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4142 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4143 ierr = MatDestroy(&A);CHKERRQ(ierr); 4144 } else { 4145 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4146 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4147 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4148 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4149 } 4150 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4151 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4152 if (!pcbddc->symmetric_primal) { 4153 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4154 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4155 } 4156 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4157 4158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4160 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4161 if (!pcbddc->symmetric_primal) { 4162 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4163 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4164 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4165 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4166 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4167 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4168 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4169 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4170 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4171 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4172 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4173 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4174 } else { 4175 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4176 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4177 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4178 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4179 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4180 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4181 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4182 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4183 } 4184 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4185 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4186 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4187 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4188 if (pcbddc->benign_n) { 4189 Mat B0_B,B0_BPHI; 4190 PetscScalar *data,*data2; 4191 PetscInt j; 4192 4193 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4194 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4195 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4196 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4197 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4198 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4199 for (j=0;j<pcbddc->benign_n;j++) { 4200 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4201 for (i=0;i<pcbddc->local_primal_size;i++) { 4202 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4203 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4204 } 4205 } 4206 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4207 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4208 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4209 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4210 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4211 } 4212 #if 0 4213 { 4214 PetscViewer viewer; 4215 char filename[256]; 4216 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4217 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4218 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4219 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4220 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4221 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4222 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4223 if (save_change) { 4224 Mat phi_B; 4225 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4226 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4227 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4228 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4229 } else { 4230 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4231 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4232 } 4233 if (pcbddc->coarse_phi_D) { 4234 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4235 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4236 } 4237 if (pcbddc->coarse_psi_B) { 4238 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4239 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4240 } 4241 if (pcbddc->coarse_psi_D) { 4242 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4243 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4244 } 4245 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4246 } 4247 #endif 4248 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4249 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4250 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4251 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4252 4253 /* check constraints */ 4254 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4255 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4256 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4257 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4258 } else { 4259 PetscScalar *data; 4260 Mat tmat; 4261 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4262 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4263 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4264 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4265 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4266 } 4267 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4268 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4269 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4270 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4272 if (!pcbddc->symmetric_primal) { 4273 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4274 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4275 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4276 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4278 } 4279 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4280 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4281 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4282 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4283 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4284 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4285 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4286 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4287 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4288 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4289 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4290 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4291 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4292 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4293 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4294 if (!pcbddc->symmetric_primal) { 4295 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4296 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4297 } 4298 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4299 } 4300 /* get back data */ 4301 *coarse_submat_vals_n = coarse_submat_vals; 4302 PetscFunctionReturn(0); 4303 } 4304 4305 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4306 { 4307 Mat *work_mat; 4308 IS isrow_s,iscol_s; 4309 PetscBool rsorted,csorted; 4310 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4311 PetscErrorCode ierr; 4312 4313 PetscFunctionBegin; 4314 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4315 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4316 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4317 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4318 4319 if (!rsorted) { 4320 const PetscInt *idxs; 4321 PetscInt *idxs_sorted,i; 4322 4323 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4324 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4325 for (i=0;i<rsize;i++) { 4326 idxs_perm_r[i] = i; 4327 } 4328 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4329 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4330 for (i=0;i<rsize;i++) { 4331 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4332 } 4333 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4334 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4335 } else { 4336 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4337 isrow_s = isrow; 4338 } 4339 4340 if (!csorted) { 4341 if (isrow == iscol) { 4342 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4343 iscol_s = isrow_s; 4344 } else { 4345 const PetscInt *idxs; 4346 PetscInt *idxs_sorted,i; 4347 4348 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4349 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4350 for (i=0;i<csize;i++) { 4351 idxs_perm_c[i] = i; 4352 } 4353 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4354 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4355 for (i=0;i<csize;i++) { 4356 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4357 } 4358 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4359 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4360 } 4361 } else { 4362 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4363 iscol_s = iscol; 4364 } 4365 4366 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4367 4368 if (!rsorted || !csorted) { 4369 Mat new_mat; 4370 IS is_perm_r,is_perm_c; 4371 4372 if (!rsorted) { 4373 PetscInt *idxs_r,i; 4374 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4375 for (i=0;i<rsize;i++) { 4376 idxs_r[idxs_perm_r[i]] = i; 4377 } 4378 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4379 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4380 } else { 4381 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4382 } 4383 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4384 4385 if (!csorted) { 4386 if (isrow_s == iscol_s) { 4387 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4388 is_perm_c = is_perm_r; 4389 } else { 4390 PetscInt *idxs_c,i; 4391 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4392 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4393 for (i=0;i<csize;i++) { 4394 idxs_c[idxs_perm_c[i]] = i; 4395 } 4396 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4397 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4398 } 4399 } else { 4400 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4401 } 4402 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4403 4404 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4405 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4406 work_mat[0] = new_mat; 4407 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4408 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4409 } 4410 4411 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4412 *B = work_mat[0]; 4413 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4414 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4415 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4416 PetscFunctionReturn(0); 4417 } 4418 4419 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4420 { 4421 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4422 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4423 Mat new_mat,lA; 4424 IS is_local,is_global; 4425 PetscInt local_size; 4426 PetscBool isseqaij; 4427 PetscErrorCode ierr; 4428 4429 PetscFunctionBegin; 4430 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4431 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4432 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4433 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4434 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4435 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4436 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4437 4438 /* check */ 4439 if (pcbddc->dbg_flag) { 4440 Vec x,x_change; 4441 PetscReal error; 4442 4443 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4444 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4445 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4446 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4447 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4448 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4449 if (!pcbddc->change_interior) { 4450 const PetscScalar *x,*y,*v; 4451 PetscReal lerror = 0.; 4452 PetscInt i; 4453 4454 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4455 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4456 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4457 for (i=0;i<local_size;i++) 4458 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4459 lerror = PetscAbsScalar(x[i]-y[i]); 4460 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4461 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4462 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4463 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4464 if (error > PETSC_SMALL) { 4465 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4466 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4467 } else { 4468 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4469 } 4470 } 4471 } 4472 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4473 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4474 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4475 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4476 if (error > PETSC_SMALL) { 4477 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4478 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4479 } else { 4480 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4481 } 4482 } 4483 ierr = VecDestroy(&x);CHKERRQ(ierr); 4484 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4485 } 4486 4487 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4488 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4489 4490 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4491 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4492 if (isseqaij) { 4493 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4494 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4495 if (lA) { 4496 Mat work; 4497 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4498 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4499 ierr = MatDestroy(&work);CHKERRQ(ierr); 4500 } 4501 } else { 4502 Mat work_mat; 4503 4504 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4505 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4506 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4507 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4508 if (lA) { 4509 Mat work; 4510 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4511 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4512 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4513 ierr = MatDestroy(&work);CHKERRQ(ierr); 4514 } 4515 } 4516 if (matis->A->symmetric_set) { 4517 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4518 #if !defined(PETSC_USE_COMPLEX) 4519 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4520 #endif 4521 } 4522 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4523 PetscFunctionReturn(0); 4524 } 4525 4526 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4527 { 4528 PC_IS* pcis = (PC_IS*)(pc->data); 4529 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4530 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4531 PetscInt *idx_R_local=NULL; 4532 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4533 PetscInt vbs,bs; 4534 PetscBT bitmask=NULL; 4535 PetscErrorCode ierr; 4536 4537 PetscFunctionBegin; 4538 /* 4539 No need to setup local scatters if 4540 - primal space is unchanged 4541 AND 4542 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4543 AND 4544 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4545 */ 4546 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4547 PetscFunctionReturn(0); 4548 } 4549 /* destroy old objects */ 4550 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4551 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4552 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4553 /* Set Non-overlapping dimensions */ 4554 n_B = pcis->n_B; 4555 n_D = pcis->n - n_B; 4556 n_vertices = pcbddc->n_vertices; 4557 4558 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4559 4560 /* create auxiliary bitmask and allocate workspace */ 4561 if (!sub_schurs || !sub_schurs->reuse_solver) { 4562 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4563 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4564 for (i=0;i<n_vertices;i++) { 4565 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4566 } 4567 4568 for (i=0, n_R=0; i<pcis->n; i++) { 4569 if (!PetscBTLookup(bitmask,i)) { 4570 idx_R_local[n_R++] = i; 4571 } 4572 } 4573 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4574 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4575 4576 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4577 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4578 } 4579 4580 /* Block code */ 4581 vbs = 1; 4582 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4583 if (bs>1 && !(n_vertices%bs)) { 4584 PetscBool is_blocked = PETSC_TRUE; 4585 PetscInt *vary; 4586 if (!sub_schurs || !sub_schurs->reuse_solver) { 4587 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4588 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4589 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4590 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4591 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4592 for (i=0; i<pcis->n/bs; i++) { 4593 if (vary[i]!=0 && vary[i]!=bs) { 4594 is_blocked = PETSC_FALSE; 4595 break; 4596 } 4597 } 4598 ierr = PetscFree(vary);CHKERRQ(ierr); 4599 } else { 4600 /* Verify directly the R set */ 4601 for (i=0; i<n_R/bs; i++) { 4602 PetscInt j,node=idx_R_local[bs*i]; 4603 for (j=1; j<bs; j++) { 4604 if (node != idx_R_local[bs*i+j]-j) { 4605 is_blocked = PETSC_FALSE; 4606 break; 4607 } 4608 } 4609 } 4610 } 4611 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4612 vbs = bs; 4613 for (i=0;i<n_R/vbs;i++) { 4614 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4615 } 4616 } 4617 } 4618 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4619 if (sub_schurs && sub_schurs->reuse_solver) { 4620 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4621 4622 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4623 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4624 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4625 reuse_solver->is_R = pcbddc->is_R_local; 4626 } else { 4627 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4628 } 4629 4630 /* print some info if requested */ 4631 if (pcbddc->dbg_flag) { 4632 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4633 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4634 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4636 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4637 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4638 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4639 } 4640 4641 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4642 if (!sub_schurs || !sub_schurs->reuse_solver) { 4643 IS is_aux1,is_aux2; 4644 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4645 4646 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4647 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4648 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4649 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4650 for (i=0; i<n_D; i++) { 4651 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4652 } 4653 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4654 for (i=0, j=0; i<n_R; i++) { 4655 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4656 aux_array1[j++] = i; 4657 } 4658 } 4659 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4660 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4661 for (i=0, j=0; i<n_B; i++) { 4662 if (!PetscBTLookup(bitmask,is_indices[i])) { 4663 aux_array2[j++] = i; 4664 } 4665 } 4666 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4667 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4668 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4669 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4670 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4671 4672 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4673 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4674 for (i=0, j=0; i<n_R; i++) { 4675 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4676 aux_array1[j++] = i; 4677 } 4678 } 4679 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4680 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4681 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4682 } 4683 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4684 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4685 } else { 4686 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4687 IS tis; 4688 PetscInt schur_size; 4689 4690 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4691 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4692 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4693 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4694 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4695 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4696 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4697 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4698 } 4699 } 4700 PetscFunctionReturn(0); 4701 } 4702 4703 4704 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4705 { 4706 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4707 PC_IS *pcis = (PC_IS*)pc->data; 4708 PC pc_temp; 4709 Mat A_RR; 4710 MatReuse reuse; 4711 PetscScalar m_one = -1.0; 4712 PetscReal value; 4713 PetscInt n_D,n_R; 4714 PetscBool check_corr[2],issbaij; 4715 PetscErrorCode ierr; 4716 /* prefixes stuff */ 4717 char dir_prefix[256],neu_prefix[256],str_level[16]; 4718 size_t len; 4719 4720 PetscFunctionBegin; 4721 4722 /* compute prefixes */ 4723 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4724 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4725 if (!pcbddc->current_level) { 4726 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4727 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4728 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4729 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4730 } else { 4731 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4732 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4733 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4734 len -= 15; /* remove "pc_bddc_coarse_" */ 4735 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4736 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4737 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4738 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4739 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4740 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4741 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4742 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4743 } 4744 4745 /* DIRICHLET PROBLEM */ 4746 if (dirichlet) { 4747 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4748 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4749 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4750 if (pcbddc->dbg_flag) { 4751 Mat A_IIn; 4752 4753 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4754 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4755 pcis->A_II = A_IIn; 4756 } 4757 } 4758 if (pcbddc->local_mat->symmetric_set) { 4759 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4760 } 4761 /* Matrix for Dirichlet problem is pcis->A_II */ 4762 n_D = pcis->n - pcis->n_B; 4763 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4764 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4765 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4766 /* default */ 4767 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4768 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4769 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4770 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4771 if (issbaij) { 4772 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4773 } else { 4774 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4775 } 4776 /* Allow user's customization */ 4777 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4778 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4779 } 4780 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4781 if (sub_schurs && sub_schurs->reuse_solver) { 4782 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4783 4784 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4785 } 4786 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4787 if (!n_D) { 4788 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4789 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4790 } 4791 /* Set Up KSP for Dirichlet problem of BDDC */ 4792 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4793 /* set ksp_D into pcis data */ 4794 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4795 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4796 pcis->ksp_D = pcbddc->ksp_D; 4797 } 4798 4799 /* NEUMANN PROBLEM */ 4800 A_RR = 0; 4801 if (neumann) { 4802 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4803 PetscInt ibs,mbs; 4804 PetscBool issbaij; 4805 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4806 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4807 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4808 if (pcbddc->ksp_R) { /* already created ksp */ 4809 PetscInt nn_R; 4810 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4811 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4812 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4813 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4814 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4815 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4816 reuse = MAT_INITIAL_MATRIX; 4817 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4818 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4819 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4820 reuse = MAT_INITIAL_MATRIX; 4821 } else { /* safe to reuse the matrix */ 4822 reuse = MAT_REUSE_MATRIX; 4823 } 4824 } 4825 /* last check */ 4826 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4827 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4828 reuse = MAT_INITIAL_MATRIX; 4829 } 4830 } else { /* first time, so we need to create the matrix */ 4831 reuse = MAT_INITIAL_MATRIX; 4832 } 4833 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4834 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4835 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4836 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4837 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4838 if (matis->A == pcbddc->local_mat) { 4839 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4840 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4841 } else { 4842 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4843 } 4844 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4845 if (matis->A == pcbddc->local_mat) { 4846 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4847 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4848 } else { 4849 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4850 } 4851 } 4852 /* extract A_RR */ 4853 if (sub_schurs && sub_schurs->reuse_solver) { 4854 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4855 4856 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4857 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4858 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4859 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4860 } else { 4861 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4862 } 4863 } else { 4864 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4865 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4866 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4867 } 4868 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4869 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4870 } 4871 if (pcbddc->local_mat->symmetric_set) { 4872 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4873 } 4874 if (!pcbddc->ksp_R) { /* create object if not present */ 4875 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4876 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4877 /* default */ 4878 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4879 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4880 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4881 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4882 if (issbaij) { 4883 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4884 } else { 4885 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4886 } 4887 /* Allow user's customization */ 4888 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4889 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4890 } 4891 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4892 if (!n_R) { 4893 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4894 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4895 } 4896 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4897 /* Reuse solver if it is present */ 4898 if (sub_schurs && sub_schurs->reuse_solver && sub_schurs->A == pcbddc->local_mat) { 4899 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4900 4901 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4902 } 4903 /* Set Up KSP for Neumann problem of BDDC */ 4904 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4905 } 4906 4907 if (pcbddc->dbg_flag) { 4908 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4909 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4910 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4911 } 4912 4913 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4914 check_corr[0] = check_corr[1] = PETSC_FALSE; 4915 if (pcbddc->NullSpace_corr[0]) { 4916 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4917 } 4918 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4919 check_corr[0] = PETSC_TRUE; 4920 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4921 } 4922 if (neumann && pcbddc->NullSpace_corr[2]) { 4923 check_corr[1] = PETSC_TRUE; 4924 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4925 } 4926 4927 /* check Dirichlet and Neumann solvers */ 4928 if (pcbddc->dbg_flag) { 4929 if (dirichlet) { /* Dirichlet */ 4930 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4931 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4932 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4933 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4934 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4935 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4936 if (check_corr[0]) { 4937 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4938 } 4939 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4940 } 4941 if (neumann) { /* Neumann */ 4942 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4943 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4944 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4945 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4946 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4948 if (check_corr[1]) { 4949 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4950 } 4951 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4952 } 4953 } 4954 /* free Neumann problem's matrix */ 4955 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4956 PetscFunctionReturn(0); 4957 } 4958 4959 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4960 { 4961 PetscErrorCode ierr; 4962 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4963 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4964 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4965 4966 PetscFunctionBegin; 4967 if (!reuse_solver) { 4968 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4969 } 4970 if (!pcbddc->switch_static) { 4971 if (applytranspose && pcbddc->local_auxmat1) { 4972 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4973 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4974 } 4975 if (!reuse_solver) { 4976 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4977 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4978 } else { 4979 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4980 4981 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4982 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4983 } 4984 } else { 4985 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4986 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4987 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4988 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4989 if (applytranspose && pcbddc->local_auxmat1) { 4990 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4991 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4992 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4993 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4994 } 4995 } 4996 if (!reuse_solver || pcbddc->switch_static) { 4997 if (applytranspose) { 4998 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4999 } else { 5000 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5001 } 5002 } else { 5003 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5004 5005 if (applytranspose) { 5006 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5007 } else { 5008 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5009 } 5010 } 5011 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5012 if (!pcbddc->switch_static) { 5013 if (!reuse_solver) { 5014 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5015 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5016 } else { 5017 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5018 5019 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5020 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5021 } 5022 if (!applytranspose && pcbddc->local_auxmat1) { 5023 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5024 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5025 } 5026 } else { 5027 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5028 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5029 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5030 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5031 if (!applytranspose && pcbddc->local_auxmat1) { 5032 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5033 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5034 } 5035 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5036 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5037 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5038 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5039 } 5040 PetscFunctionReturn(0); 5041 } 5042 5043 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5044 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5045 { 5046 PetscErrorCode ierr; 5047 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5048 PC_IS* pcis = (PC_IS*) (pc->data); 5049 const PetscScalar zero = 0.0; 5050 5051 PetscFunctionBegin; 5052 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5053 if (!pcbddc->benign_apply_coarse_only) { 5054 if (applytranspose) { 5055 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5056 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5057 } else { 5058 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5059 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5060 } 5061 } else { 5062 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5063 } 5064 5065 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5066 if (pcbddc->benign_n) { 5067 PetscScalar *array; 5068 PetscInt j; 5069 5070 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5071 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5072 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5073 } 5074 5075 /* start communications from local primal nodes to rhs of coarse solver */ 5076 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5077 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5078 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5079 5080 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5081 if (pcbddc->coarse_ksp) { 5082 Mat coarse_mat; 5083 Vec rhs,sol; 5084 MatNullSpace nullsp; 5085 PetscBool isbddc = PETSC_FALSE; 5086 5087 if (pcbddc->benign_have_null) { 5088 PC coarse_pc; 5089 5090 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5091 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5092 /* we need to propagate to coarser levels the need for a possible benign correction */ 5093 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5094 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5095 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5096 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5097 } 5098 } 5099 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5100 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5101 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5102 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5103 if (nullsp) { 5104 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5105 } 5106 if (applytranspose) { 5107 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5108 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5109 } else { 5110 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5111 PC coarse_pc; 5112 5113 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5114 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5115 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5116 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5117 } else { 5118 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5119 } 5120 } 5121 /* we don't need the benign correction at coarser levels anymore */ 5122 if (pcbddc->benign_have_null && isbddc) { 5123 PC coarse_pc; 5124 PC_BDDC* coarsepcbddc; 5125 5126 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5127 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5128 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5129 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5130 } 5131 if (nullsp) { 5132 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5133 } 5134 } 5135 5136 /* Local solution on R nodes */ 5137 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5138 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5139 } 5140 /* communications from coarse sol to local primal nodes */ 5141 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5142 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5143 5144 /* Sum contributions from the two levels */ 5145 if (!pcbddc->benign_apply_coarse_only) { 5146 if (applytranspose) { 5147 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5148 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5149 } else { 5150 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5151 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5152 } 5153 /* store p0 */ 5154 if (pcbddc->benign_n) { 5155 PetscScalar *array; 5156 PetscInt j; 5157 5158 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5159 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5160 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5161 } 5162 } else { /* expand the coarse solution */ 5163 if (applytranspose) { 5164 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5165 } else { 5166 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5167 } 5168 } 5169 PetscFunctionReturn(0); 5170 } 5171 5172 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5173 { 5174 PetscErrorCode ierr; 5175 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5176 PetscScalar *array; 5177 Vec from,to; 5178 5179 PetscFunctionBegin; 5180 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5181 from = pcbddc->coarse_vec; 5182 to = pcbddc->vec1_P; 5183 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5184 Vec tvec; 5185 5186 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5187 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5188 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5189 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5190 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5191 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5192 } 5193 } else { /* from local to global -> put data in coarse right hand side */ 5194 from = pcbddc->vec1_P; 5195 to = pcbddc->coarse_vec; 5196 } 5197 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5198 PetscFunctionReturn(0); 5199 } 5200 5201 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5202 { 5203 PetscErrorCode ierr; 5204 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5205 PetscScalar *array; 5206 Vec from,to; 5207 5208 PetscFunctionBegin; 5209 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5210 from = pcbddc->coarse_vec; 5211 to = pcbddc->vec1_P; 5212 } else { /* from local to global -> put data in coarse right hand side */ 5213 from = pcbddc->vec1_P; 5214 to = pcbddc->coarse_vec; 5215 } 5216 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5217 if (smode == SCATTER_FORWARD) { 5218 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5219 Vec tvec; 5220 5221 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5222 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5223 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5224 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5225 } 5226 } else { 5227 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5228 ierr = VecResetArray(from);CHKERRQ(ierr); 5229 } 5230 } 5231 PetscFunctionReturn(0); 5232 } 5233 5234 /* uncomment for testing purposes */ 5235 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5236 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5237 { 5238 PetscErrorCode ierr; 5239 PC_IS* pcis = (PC_IS*)(pc->data); 5240 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5241 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5242 /* one and zero */ 5243 PetscScalar one=1.0,zero=0.0; 5244 /* space to store constraints and their local indices */ 5245 PetscScalar *constraints_data; 5246 PetscInt *constraints_idxs,*constraints_idxs_B; 5247 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5248 PetscInt *constraints_n; 5249 /* iterators */ 5250 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5251 /* BLAS integers */ 5252 PetscBLASInt lwork,lierr; 5253 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5254 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5255 /* reuse */ 5256 PetscInt olocal_primal_size,olocal_primal_size_cc; 5257 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5258 /* change of basis */ 5259 PetscBool qr_needed; 5260 PetscBT change_basis,qr_needed_idx; 5261 /* auxiliary stuff */ 5262 PetscInt *nnz,*is_indices; 5263 PetscInt ncc; 5264 /* some quantities */ 5265 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5266 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5267 5268 PetscFunctionBegin; 5269 /* Destroy Mat objects computed previously */ 5270 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5271 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5272 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5273 /* save info on constraints from previous setup (if any) */ 5274 olocal_primal_size = pcbddc->local_primal_size; 5275 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5276 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5277 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5278 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5279 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5280 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5281 5282 if (!pcbddc->adaptive_selection) { 5283 IS ISForVertices,*ISForFaces,*ISForEdges; 5284 MatNullSpace nearnullsp; 5285 const Vec *nearnullvecs; 5286 Vec *localnearnullsp; 5287 PetscScalar *array; 5288 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5289 PetscBool nnsp_has_cnst; 5290 /* LAPACK working arrays for SVD or POD */ 5291 PetscBool skip_lapack,boolforchange; 5292 PetscScalar *work; 5293 PetscReal *singular_vals; 5294 #if defined(PETSC_USE_COMPLEX) 5295 PetscReal *rwork; 5296 #endif 5297 #if defined(PETSC_MISSING_LAPACK_GESVD) 5298 PetscScalar *temp_basis,*correlation_mat; 5299 #else 5300 PetscBLASInt dummy_int=1; 5301 PetscScalar dummy_scalar=1.; 5302 #endif 5303 5304 /* Get index sets for faces, edges and vertices from graph */ 5305 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5306 /* print some info */ 5307 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5308 PetscInt nv; 5309 5310 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5311 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5312 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5313 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5314 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5316 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5318 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5319 } 5320 5321 /* free unneeded index sets */ 5322 if (!pcbddc->use_vertices) { 5323 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5324 } 5325 if (!pcbddc->use_edges) { 5326 for (i=0;i<n_ISForEdges;i++) { 5327 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5328 } 5329 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5330 n_ISForEdges = 0; 5331 } 5332 if (!pcbddc->use_faces) { 5333 for (i=0;i<n_ISForFaces;i++) { 5334 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5335 } 5336 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5337 n_ISForFaces = 0; 5338 } 5339 5340 /* check if near null space is attached to global mat */ 5341 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5342 if (nearnullsp) { 5343 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5344 /* remove any stored info */ 5345 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5346 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5347 /* store information for BDDC solver reuse */ 5348 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5349 pcbddc->onearnullspace = nearnullsp; 5350 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5351 for (i=0;i<nnsp_size;i++) { 5352 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5353 } 5354 } else { /* if near null space is not provided BDDC uses constants by default */ 5355 nnsp_size = 0; 5356 nnsp_has_cnst = PETSC_TRUE; 5357 } 5358 /* get max number of constraints on a single cc */ 5359 max_constraints = nnsp_size; 5360 if (nnsp_has_cnst) max_constraints++; 5361 5362 /* 5363 Evaluate maximum storage size needed by the procedure 5364 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5365 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5366 There can be multiple constraints per connected component 5367 */ 5368 n_vertices = 0; 5369 if (ISForVertices) { 5370 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5371 } 5372 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5373 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5374 5375 total_counts = n_ISForFaces+n_ISForEdges; 5376 total_counts *= max_constraints; 5377 total_counts += n_vertices; 5378 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5379 5380 total_counts = 0; 5381 max_size_of_constraint = 0; 5382 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5383 IS used_is; 5384 if (i<n_ISForEdges) { 5385 used_is = ISForEdges[i]; 5386 } else { 5387 used_is = ISForFaces[i-n_ISForEdges]; 5388 } 5389 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5390 total_counts += j; 5391 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5392 } 5393 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5394 5395 /* get local part of global near null space vectors */ 5396 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5397 for (k=0;k<nnsp_size;k++) { 5398 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5399 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5400 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5401 } 5402 5403 /* whether or not to skip lapack calls */ 5404 skip_lapack = PETSC_TRUE; 5405 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5406 5407 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5408 if (!skip_lapack) { 5409 PetscScalar temp_work; 5410 5411 #if defined(PETSC_MISSING_LAPACK_GESVD) 5412 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5413 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5414 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5415 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5416 #if defined(PETSC_USE_COMPLEX) 5417 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5418 #endif 5419 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5420 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5421 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5422 lwork = -1; 5423 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5424 #if !defined(PETSC_USE_COMPLEX) 5425 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5426 #else 5427 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5428 #endif 5429 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5430 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5431 #else /* on missing GESVD */ 5432 /* SVD */ 5433 PetscInt max_n,min_n; 5434 max_n = max_size_of_constraint; 5435 min_n = max_constraints; 5436 if (max_size_of_constraint < max_constraints) { 5437 min_n = max_size_of_constraint; 5438 max_n = max_constraints; 5439 } 5440 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5441 #if defined(PETSC_USE_COMPLEX) 5442 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5443 #endif 5444 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5445 lwork = -1; 5446 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5447 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5448 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5449 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5450 #if !defined(PETSC_USE_COMPLEX) 5451 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5452 #else 5453 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5454 #endif 5455 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5456 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5457 #endif /* on missing GESVD */ 5458 /* Allocate optimal workspace */ 5459 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5460 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5461 } 5462 /* Now we can loop on constraining sets */ 5463 total_counts = 0; 5464 constraints_idxs_ptr[0] = 0; 5465 constraints_data_ptr[0] = 0; 5466 /* vertices */ 5467 if (n_vertices) { 5468 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5469 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5470 for (i=0;i<n_vertices;i++) { 5471 constraints_n[total_counts] = 1; 5472 constraints_data[total_counts] = 1.0; 5473 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5474 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5475 total_counts++; 5476 } 5477 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5478 n_vertices = total_counts; 5479 } 5480 5481 /* edges and faces */ 5482 total_counts_cc = total_counts; 5483 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5484 IS used_is; 5485 PetscBool idxs_copied = PETSC_FALSE; 5486 5487 if (ncc<n_ISForEdges) { 5488 used_is = ISForEdges[ncc]; 5489 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5490 } else { 5491 used_is = ISForFaces[ncc-n_ISForEdges]; 5492 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5493 } 5494 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5495 5496 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5497 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5498 /* change of basis should not be performed on local periodic nodes */ 5499 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5500 if (nnsp_has_cnst) { 5501 PetscScalar quad_value; 5502 5503 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5504 idxs_copied = PETSC_TRUE; 5505 5506 if (!pcbddc->use_nnsp_true) { 5507 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5508 } else { 5509 quad_value = 1.0; 5510 } 5511 for (j=0;j<size_of_constraint;j++) { 5512 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5513 } 5514 temp_constraints++; 5515 total_counts++; 5516 } 5517 for (k=0;k<nnsp_size;k++) { 5518 PetscReal real_value; 5519 PetscScalar *ptr_to_data; 5520 5521 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5522 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5523 for (j=0;j<size_of_constraint;j++) { 5524 ptr_to_data[j] = array[is_indices[j]]; 5525 } 5526 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5527 /* check if array is null on the connected component */ 5528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5529 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5530 if (real_value > 0.0) { /* keep indices and values */ 5531 temp_constraints++; 5532 total_counts++; 5533 if (!idxs_copied) { 5534 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5535 idxs_copied = PETSC_TRUE; 5536 } 5537 } 5538 } 5539 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5540 valid_constraints = temp_constraints; 5541 if (!pcbddc->use_nnsp_true && temp_constraints) { 5542 if (temp_constraints == 1) { /* just normalize the constraint */ 5543 PetscScalar norm,*ptr_to_data; 5544 5545 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5546 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5547 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5548 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5549 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5550 } else { /* perform SVD */ 5551 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5552 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5553 5554 #if defined(PETSC_MISSING_LAPACK_GESVD) 5555 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5556 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5557 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5558 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5559 from that computed using LAPACKgesvd 5560 -> This is due to a different computation of eigenvectors in LAPACKheev 5561 -> The quality of the POD-computed basis will be the same */ 5562 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5563 /* Store upper triangular part of correlation matrix */ 5564 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5565 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5566 for (j=0;j<temp_constraints;j++) { 5567 for (k=0;k<j+1;k++) { 5568 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5569 } 5570 } 5571 /* compute eigenvalues and eigenvectors of correlation matrix */ 5572 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5573 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5574 #if !defined(PETSC_USE_COMPLEX) 5575 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5576 #else 5577 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5578 #endif 5579 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5580 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5581 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5582 j = 0; 5583 while (j < temp_constraints && singular_vals[j] < tol) j++; 5584 total_counts = total_counts-j; 5585 valid_constraints = temp_constraints-j; 5586 /* scale and copy POD basis into used quadrature memory */ 5587 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5588 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5589 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5590 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5591 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5592 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5593 if (j<temp_constraints) { 5594 PetscInt ii; 5595 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5596 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5597 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5598 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5599 for (k=0;k<temp_constraints-j;k++) { 5600 for (ii=0;ii<size_of_constraint;ii++) { 5601 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5602 } 5603 } 5604 } 5605 #else /* on missing GESVD */ 5606 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5607 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5609 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5610 #if !defined(PETSC_USE_COMPLEX) 5611 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5612 #else 5613 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5614 #endif 5615 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5616 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5617 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5618 k = temp_constraints; 5619 if (k > size_of_constraint) k = size_of_constraint; 5620 j = 0; 5621 while (j < k && singular_vals[k-j-1] < tol) j++; 5622 valid_constraints = k-j; 5623 total_counts = total_counts-temp_constraints+valid_constraints; 5624 #endif /* on missing GESVD */ 5625 } 5626 } 5627 /* update pointers information */ 5628 if (valid_constraints) { 5629 constraints_n[total_counts_cc] = valid_constraints; 5630 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5631 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5632 /* set change_of_basis flag */ 5633 if (boolforchange) { 5634 PetscBTSet(change_basis,total_counts_cc); 5635 } 5636 total_counts_cc++; 5637 } 5638 } 5639 /* free workspace */ 5640 if (!skip_lapack) { 5641 ierr = PetscFree(work);CHKERRQ(ierr); 5642 #if defined(PETSC_USE_COMPLEX) 5643 ierr = PetscFree(rwork);CHKERRQ(ierr); 5644 #endif 5645 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5646 #if defined(PETSC_MISSING_LAPACK_GESVD) 5647 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5648 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5649 #endif 5650 } 5651 for (k=0;k<nnsp_size;k++) { 5652 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5653 } 5654 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5655 /* free index sets of faces, edges and vertices */ 5656 for (i=0;i<n_ISForFaces;i++) { 5657 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5658 } 5659 if (n_ISForFaces) { 5660 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5661 } 5662 for (i=0;i<n_ISForEdges;i++) { 5663 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5664 } 5665 if (n_ISForEdges) { 5666 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5667 } 5668 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5669 } else { 5670 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5671 5672 total_counts = 0; 5673 n_vertices = 0; 5674 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5675 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5676 } 5677 max_constraints = 0; 5678 total_counts_cc = 0; 5679 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5680 total_counts += pcbddc->adaptive_constraints_n[i]; 5681 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5682 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5683 } 5684 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5685 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5686 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5687 constraints_data = pcbddc->adaptive_constraints_data; 5688 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5689 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5690 total_counts_cc = 0; 5691 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5692 if (pcbddc->adaptive_constraints_n[i]) { 5693 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5694 } 5695 } 5696 #if 0 5697 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5698 for (i=0;i<total_counts_cc;i++) { 5699 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5700 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5701 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5702 printf(" %d",constraints_idxs[j]); 5703 } 5704 printf("\n"); 5705 printf("number of cc: %d\n",constraints_n[i]); 5706 } 5707 for (i=0;i<n_vertices;i++) { 5708 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5709 } 5710 for (i=0;i<sub_schurs->n_subs;i++) { 5711 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5712 } 5713 #endif 5714 5715 max_size_of_constraint = 0; 5716 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5717 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5718 /* Change of basis */ 5719 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5720 if (pcbddc->use_change_of_basis) { 5721 for (i=0;i<sub_schurs->n_subs;i++) { 5722 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5723 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5724 } 5725 } 5726 } 5727 } 5728 pcbddc->local_primal_size = total_counts; 5729 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5730 5731 /* map constraints_idxs in boundary numbering */ 5732 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5733 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5734 5735 /* Create constraint matrix */ 5736 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5737 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5738 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5739 5740 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5741 /* determine if a QR strategy is needed for change of basis */ 5742 qr_needed = PETSC_FALSE; 5743 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5744 total_primal_vertices=0; 5745 pcbddc->local_primal_size_cc = 0; 5746 for (i=0;i<total_counts_cc;i++) { 5747 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5748 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5749 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5750 pcbddc->local_primal_size_cc += 1; 5751 } else if (PetscBTLookup(change_basis,i)) { 5752 for (k=0;k<constraints_n[i];k++) { 5753 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5754 } 5755 pcbddc->local_primal_size_cc += constraints_n[i]; 5756 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5757 PetscBTSet(qr_needed_idx,i); 5758 qr_needed = PETSC_TRUE; 5759 } 5760 } else { 5761 pcbddc->local_primal_size_cc += 1; 5762 } 5763 } 5764 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5765 pcbddc->n_vertices = total_primal_vertices; 5766 /* permute indices in order to have a sorted set of vertices */ 5767 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5768 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5769 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5770 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5771 5772 /* nonzero structure of constraint matrix */ 5773 /* and get reference dof for local constraints */ 5774 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5775 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5776 5777 j = total_primal_vertices; 5778 total_counts = total_primal_vertices; 5779 cum = total_primal_vertices; 5780 for (i=n_vertices;i<total_counts_cc;i++) { 5781 if (!PetscBTLookup(change_basis,i)) { 5782 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5783 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5784 cum++; 5785 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5786 for (k=0;k<constraints_n[i];k++) { 5787 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5788 nnz[j+k] = size_of_constraint; 5789 } 5790 j += constraints_n[i]; 5791 } 5792 } 5793 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5794 ierr = PetscFree(nnz);CHKERRQ(ierr); 5795 5796 /* set values in constraint matrix */ 5797 for (i=0;i<total_primal_vertices;i++) { 5798 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5799 } 5800 total_counts = total_primal_vertices; 5801 for (i=n_vertices;i<total_counts_cc;i++) { 5802 if (!PetscBTLookup(change_basis,i)) { 5803 PetscInt *cols; 5804 5805 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5806 cols = constraints_idxs+constraints_idxs_ptr[i]; 5807 for (k=0;k<constraints_n[i];k++) { 5808 PetscInt row = total_counts+k; 5809 PetscScalar *vals; 5810 5811 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5812 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5813 } 5814 total_counts += constraints_n[i]; 5815 } 5816 } 5817 /* assembling */ 5818 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5819 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5820 5821 /* 5822 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5823 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5824 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5825 */ 5826 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5827 if (pcbddc->use_change_of_basis) { 5828 /* dual and primal dofs on a single cc */ 5829 PetscInt dual_dofs,primal_dofs; 5830 /* working stuff for GEQRF */ 5831 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5832 PetscBLASInt lqr_work; 5833 /* working stuff for UNGQR */ 5834 PetscScalar *gqr_work,lgqr_work_t; 5835 PetscBLASInt lgqr_work; 5836 /* working stuff for TRTRS */ 5837 PetscScalar *trs_rhs; 5838 PetscBLASInt Blas_NRHS; 5839 /* pointers for values insertion into change of basis matrix */ 5840 PetscInt *start_rows,*start_cols; 5841 PetscScalar *start_vals; 5842 /* working stuff for values insertion */ 5843 PetscBT is_primal; 5844 PetscInt *aux_primal_numbering_B; 5845 /* matrix sizes */ 5846 PetscInt global_size,local_size; 5847 /* temporary change of basis */ 5848 Mat localChangeOfBasisMatrix; 5849 /* extra space for debugging */ 5850 PetscScalar *dbg_work; 5851 5852 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5853 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5854 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5855 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5856 /* nonzeros for local mat */ 5857 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5858 if (!pcbddc->benign_change || pcbddc->fake_change) { 5859 for (i=0;i<pcis->n;i++) nnz[i]=1; 5860 } else { 5861 const PetscInt *ii; 5862 PetscInt n; 5863 PetscBool flg_row; 5864 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5865 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5866 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5867 } 5868 for (i=n_vertices;i<total_counts_cc;i++) { 5869 if (PetscBTLookup(change_basis,i)) { 5870 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5871 if (PetscBTLookup(qr_needed_idx,i)) { 5872 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5873 } else { 5874 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5875 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5876 } 5877 } 5878 } 5879 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5880 ierr = PetscFree(nnz);CHKERRQ(ierr); 5881 /* Set interior change in the matrix */ 5882 if (!pcbddc->benign_change || pcbddc->fake_change) { 5883 for (i=0;i<pcis->n;i++) { 5884 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5885 } 5886 } else { 5887 const PetscInt *ii,*jj; 5888 PetscScalar *aa; 5889 PetscInt n; 5890 PetscBool flg_row; 5891 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5892 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5893 for (i=0;i<n;i++) { 5894 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5895 } 5896 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5897 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5898 } 5899 5900 if (pcbddc->dbg_flag) { 5901 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5902 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5903 } 5904 5905 5906 /* Now we loop on the constraints which need a change of basis */ 5907 /* 5908 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5909 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5910 5911 Basic blocks of change of basis matrix T computed by 5912 5913 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5914 5915 | 1 0 ... 0 s_1/S | 5916 | 0 1 ... 0 s_2/S | 5917 | ... | 5918 | 0 ... 1 s_{n-1}/S | 5919 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5920 5921 with S = \sum_{i=1}^n s_i^2 5922 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5923 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5924 5925 - QR decomposition of constraints otherwise 5926 */ 5927 if (qr_needed) { 5928 /* space to store Q */ 5929 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5930 /* array to store scaling factors for reflectors */ 5931 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5932 /* first we issue queries for optimal work */ 5933 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5934 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5935 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5936 lqr_work = -1; 5937 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5938 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5939 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5940 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5941 lgqr_work = -1; 5942 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5943 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5944 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5945 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5946 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5947 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5948 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5949 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5950 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5951 /* array to store rhs and solution of triangular solver */ 5952 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5953 /* allocating workspace for check */ 5954 if (pcbddc->dbg_flag) { 5955 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5956 } 5957 } 5958 /* array to store whether a node is primal or not */ 5959 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5960 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5961 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5962 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5963 for (i=0;i<total_primal_vertices;i++) { 5964 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5965 } 5966 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5967 5968 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5969 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5970 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5971 if (PetscBTLookup(change_basis,total_counts)) { 5972 /* get constraint info */ 5973 primal_dofs = constraints_n[total_counts]; 5974 dual_dofs = size_of_constraint-primal_dofs; 5975 5976 if (pcbddc->dbg_flag) { 5977 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5978 } 5979 5980 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5981 5982 /* copy quadrature constraints for change of basis check */ 5983 if (pcbddc->dbg_flag) { 5984 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5985 } 5986 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5987 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5988 5989 /* compute QR decomposition of constraints */ 5990 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5991 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5993 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5994 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5995 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5996 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5997 5998 /* explictly compute R^-T */ 5999 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6000 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6001 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6002 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6003 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6004 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6005 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6006 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6007 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6008 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6009 6010 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6011 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6013 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6015 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6016 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6017 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 6018 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6019 6020 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6021 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6022 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6023 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6024 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6025 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6026 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6027 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6028 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6029 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6030 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6031 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6032 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6033 6034 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6035 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6036 /* insert cols for primal dofs */ 6037 for (j=0;j<primal_dofs;j++) { 6038 start_vals = &qr_basis[j*size_of_constraint]; 6039 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6040 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6041 } 6042 /* insert cols for dual dofs */ 6043 for (j=0,k=0;j<dual_dofs;k++) { 6044 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6045 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6046 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6047 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6048 j++; 6049 } 6050 } 6051 6052 /* check change of basis */ 6053 if (pcbddc->dbg_flag) { 6054 PetscInt ii,jj; 6055 PetscBool valid_qr=PETSC_TRUE; 6056 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6058 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6059 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6061 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6062 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6063 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6064 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6065 for (jj=0;jj<size_of_constraint;jj++) { 6066 for (ii=0;ii<primal_dofs;ii++) { 6067 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6068 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 6069 } 6070 } 6071 if (!valid_qr) { 6072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6073 for (jj=0;jj<size_of_constraint;jj++) { 6074 for (ii=0;ii<primal_dofs;ii++) { 6075 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6076 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6077 } 6078 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 6079 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6080 } 6081 } 6082 } 6083 } else { 6084 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6085 } 6086 } 6087 } else { /* simple transformation block */ 6088 PetscInt row,col; 6089 PetscScalar val,norm; 6090 6091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6092 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6093 for (j=0;j<size_of_constraint;j++) { 6094 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6095 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6096 if (!PetscBTLookup(is_primal,row_B)) { 6097 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6098 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6099 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6100 } else { 6101 for (k=0;k<size_of_constraint;k++) { 6102 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6103 if (row != col) { 6104 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6105 } else { 6106 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6107 } 6108 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6109 } 6110 } 6111 } 6112 if (pcbddc->dbg_flag) { 6113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6114 } 6115 } 6116 } else { 6117 if (pcbddc->dbg_flag) { 6118 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6119 } 6120 } 6121 } 6122 6123 /* free workspace */ 6124 if (qr_needed) { 6125 if (pcbddc->dbg_flag) { 6126 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6127 } 6128 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6129 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6130 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6131 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6132 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6133 } 6134 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6135 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6136 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6137 6138 /* assembling of global change of variable */ 6139 if (!pcbddc->fake_change) { 6140 Mat tmat; 6141 PetscInt bs; 6142 6143 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6144 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6145 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6146 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6147 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6148 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6149 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6150 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6151 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6152 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6153 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6154 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6155 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6156 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6157 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6158 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6159 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6160 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6161 6162 /* check */ 6163 if (pcbddc->dbg_flag) { 6164 PetscReal error; 6165 Vec x,x_change; 6166 6167 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6168 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6169 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6170 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6171 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6172 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6173 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6174 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6175 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6176 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6177 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6178 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6179 if (error > PETSC_SMALL) { 6180 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6181 } 6182 ierr = VecDestroy(&x);CHKERRQ(ierr); 6183 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6184 } 6185 /* adapt sub_schurs computed (if any) */ 6186 if (pcbddc->use_deluxe_scaling) { 6187 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6188 6189 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6190 if (sub_schurs && sub_schurs->S_Ej_all) { 6191 Mat S_new,tmat; 6192 IS is_all_N,is_V_Sall = NULL; 6193 6194 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6195 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6196 if (pcbddc->deluxe_zerorows) { 6197 ISLocalToGlobalMapping NtoSall; 6198 IS is_V; 6199 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6200 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6201 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6202 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6203 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6204 } 6205 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6206 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6207 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6208 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6209 if (pcbddc->deluxe_zerorows) { 6210 const PetscScalar *array; 6211 const PetscInt *idxs_V,*idxs_all; 6212 PetscInt i,n_V; 6213 6214 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6215 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6216 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6217 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6218 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6219 for (i=0;i<n_V;i++) { 6220 PetscScalar val; 6221 PetscInt idx; 6222 6223 idx = idxs_V[i]; 6224 val = array[idxs_all[idxs_V[i]]]; 6225 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6226 } 6227 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6228 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6229 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6230 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6231 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6232 } 6233 sub_schurs->S_Ej_all = S_new; 6234 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6235 if (sub_schurs->sum_S_Ej_all) { 6236 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6237 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6238 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6239 if (pcbddc->deluxe_zerorows) { 6240 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6241 } 6242 sub_schurs->sum_S_Ej_all = S_new; 6243 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6244 } 6245 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6246 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6247 } 6248 /* destroy any change of basis context in sub_schurs */ 6249 if (sub_schurs && sub_schurs->change) { 6250 PetscInt i; 6251 6252 for (i=0;i<sub_schurs->n_subs;i++) { 6253 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6254 } 6255 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6256 } 6257 } 6258 if (pcbddc->switch_static) { /* need to save the local change */ 6259 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6260 } else { 6261 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6262 } 6263 /* determine if any process has changed the pressures locally */ 6264 pcbddc->change_interior = pcbddc->benign_have_null; 6265 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6266 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6267 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6268 pcbddc->use_qr_single = qr_needed; 6269 } 6270 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6271 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6272 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6273 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6274 } else { 6275 Mat benign_global = NULL; 6276 if (pcbddc->benign_have_null) { 6277 Mat tmat; 6278 6279 pcbddc->change_interior = PETSC_TRUE; 6280 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6281 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6282 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6283 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6284 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6285 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6286 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6287 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6288 if (pcbddc->benign_change) { 6289 Mat M; 6290 6291 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6292 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6293 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6294 ierr = MatDestroy(&M);CHKERRQ(ierr); 6295 } else { 6296 Mat eye; 6297 PetscScalar *array; 6298 6299 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6300 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6301 for (i=0;i<pcis->n;i++) { 6302 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6303 } 6304 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6305 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6306 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6307 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6308 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6309 } 6310 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6311 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6312 } 6313 if (pcbddc->user_ChangeOfBasisMatrix) { 6314 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6315 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6316 } else if (pcbddc->benign_have_null) { 6317 pcbddc->ChangeOfBasisMatrix = benign_global; 6318 } 6319 } 6320 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6321 IS is_global; 6322 const PetscInt *gidxs; 6323 6324 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6325 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6326 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6327 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6328 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6329 } 6330 } 6331 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6332 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6333 } 6334 6335 if (!pcbddc->fake_change) { 6336 /* add pressure dofs to set of primal nodes for numbering purposes */ 6337 for (i=0;i<pcbddc->benign_n;i++) { 6338 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6339 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6340 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6341 pcbddc->local_primal_size_cc++; 6342 pcbddc->local_primal_size++; 6343 } 6344 6345 /* check if a new primal space has been introduced (also take into account benign trick) */ 6346 pcbddc->new_primal_space_local = PETSC_TRUE; 6347 if (olocal_primal_size == pcbddc->local_primal_size) { 6348 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6349 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6350 if (!pcbddc->new_primal_space_local) { 6351 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6352 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6353 } 6354 } 6355 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6356 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6357 } 6358 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6359 6360 /* flush dbg viewer */ 6361 if (pcbddc->dbg_flag) { 6362 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6363 } 6364 6365 /* free workspace */ 6366 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6367 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6368 if (!pcbddc->adaptive_selection) { 6369 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6370 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6371 } else { 6372 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6373 pcbddc->adaptive_constraints_idxs_ptr, 6374 pcbddc->adaptive_constraints_data_ptr, 6375 pcbddc->adaptive_constraints_idxs, 6376 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6377 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6378 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6379 } 6380 PetscFunctionReturn(0); 6381 } 6382 6383 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6384 { 6385 ISLocalToGlobalMapping map; 6386 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6387 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6388 PetscInt i,N; 6389 PetscBool rcsr = PETSC_FALSE; 6390 PetscErrorCode ierr; 6391 6392 PetscFunctionBegin; 6393 if (pcbddc->recompute_topography) { 6394 pcbddc->graphanalyzed = PETSC_FALSE; 6395 /* Reset previously computed graph */ 6396 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6397 /* Init local Graph struct */ 6398 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6399 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6400 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6401 6402 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6403 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6404 } 6405 /* Check validity of the csr graph passed in by the user */ 6406 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6407 6408 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6409 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6410 PetscInt *xadj,*adjncy; 6411 PetscInt nvtxs; 6412 PetscBool flg_row=PETSC_FALSE; 6413 6414 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6415 if (flg_row) { 6416 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6417 pcbddc->computed_rowadj = PETSC_TRUE; 6418 } 6419 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6420 rcsr = PETSC_TRUE; 6421 } 6422 if (pcbddc->dbg_flag) { 6423 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6424 } 6425 6426 /* Setup of Graph */ 6427 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6428 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6429 6430 /* attach info on disconnected subdomains if present */ 6431 if (pcbddc->n_local_subs) { 6432 PetscInt *local_subs; 6433 6434 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6435 for (i=0;i<pcbddc->n_local_subs;i++) { 6436 const PetscInt *idxs; 6437 PetscInt nl,j; 6438 6439 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6440 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6441 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6442 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6443 } 6444 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6445 pcbddc->mat_graph->local_subs = local_subs; 6446 } 6447 } 6448 6449 if (!pcbddc->graphanalyzed) { 6450 /* Graph's connected components analysis */ 6451 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6452 pcbddc->graphanalyzed = PETSC_TRUE; 6453 } 6454 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6455 PetscFunctionReturn(0); 6456 } 6457 6458 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6459 { 6460 PetscInt i,j; 6461 PetscScalar *alphas; 6462 PetscErrorCode ierr; 6463 6464 PetscFunctionBegin; 6465 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6466 for (i=0;i<n;i++) { 6467 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6468 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6469 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6470 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6471 } 6472 ierr = PetscFree(alphas);CHKERRQ(ierr); 6473 PetscFunctionReturn(0); 6474 } 6475 6476 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6477 { 6478 Mat A; 6479 PetscInt n_neighs,*neighs,*n_shared,**shared; 6480 PetscMPIInt size,rank,color; 6481 PetscInt *xadj,*adjncy; 6482 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6483 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6484 PetscInt void_procs,*procs_candidates = NULL; 6485 PetscInt xadj_count,*count; 6486 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6487 PetscSubcomm psubcomm; 6488 MPI_Comm subcomm; 6489 PetscErrorCode ierr; 6490 6491 PetscFunctionBegin; 6492 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6493 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6494 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6495 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6496 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6497 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6498 6499 if (have_void) *have_void = PETSC_FALSE; 6500 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6501 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6502 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6503 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6504 im_active = !!n; 6505 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6506 void_procs = size - active_procs; 6507 /* get ranks of of non-active processes in mat communicator */ 6508 if (void_procs) { 6509 PetscInt ncand; 6510 6511 if (have_void) *have_void = PETSC_TRUE; 6512 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6513 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6514 for (i=0,ncand=0;i<size;i++) { 6515 if (!procs_candidates[i]) { 6516 procs_candidates[ncand++] = i; 6517 } 6518 } 6519 /* force n_subdomains to be not greater that the number of non-active processes */ 6520 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6521 } 6522 6523 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6524 number of subdomains requested 1 -> send to master or first candidate in voids */ 6525 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6526 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6527 PetscInt issize,isidx,dest; 6528 if (*n_subdomains == 1) dest = 0; 6529 else dest = rank; 6530 if (im_active) { 6531 issize = 1; 6532 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6533 isidx = procs_candidates[dest]; 6534 } else { 6535 isidx = dest; 6536 } 6537 } else { 6538 issize = 0; 6539 isidx = -1; 6540 } 6541 if (*n_subdomains != 1) *n_subdomains = active_procs; 6542 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6543 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6544 PetscFunctionReturn(0); 6545 } 6546 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6547 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6548 threshold = PetscMax(threshold,2); 6549 6550 /* Get info on mapping */ 6551 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6552 6553 /* build local CSR graph of subdomains' connectivity */ 6554 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6555 xadj[0] = 0; 6556 xadj[1] = PetscMax(n_neighs-1,0); 6557 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6558 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6559 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6560 for (i=1;i<n_neighs;i++) 6561 for (j=0;j<n_shared[i];j++) 6562 count[shared[i][j]] += 1; 6563 6564 xadj_count = 0; 6565 for (i=1;i<n_neighs;i++) { 6566 for (j=0;j<n_shared[i];j++) { 6567 if (count[shared[i][j]] < threshold) { 6568 adjncy[xadj_count] = neighs[i]; 6569 adjncy_wgt[xadj_count] = n_shared[i]; 6570 xadj_count++; 6571 break; 6572 } 6573 } 6574 } 6575 xadj[1] = xadj_count; 6576 ierr = PetscFree(count);CHKERRQ(ierr); 6577 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6578 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6579 6580 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6581 6582 /* Restrict work on active processes only */ 6583 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6584 if (void_procs) { 6585 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6586 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6587 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6588 subcomm = PetscSubcommChild(psubcomm); 6589 } else { 6590 psubcomm = NULL; 6591 subcomm = PetscObjectComm((PetscObject)mat); 6592 } 6593 6594 v_wgt = NULL; 6595 if (!color) { 6596 ierr = PetscFree(xadj);CHKERRQ(ierr); 6597 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6598 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6599 } else { 6600 Mat subdomain_adj; 6601 IS new_ranks,new_ranks_contig; 6602 MatPartitioning partitioner; 6603 PetscInt rstart=0,rend=0; 6604 PetscInt *is_indices,*oldranks; 6605 PetscMPIInt size; 6606 PetscBool aggregate; 6607 6608 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6609 if (void_procs) { 6610 PetscInt prank = rank; 6611 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6612 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6613 for (i=0;i<xadj[1];i++) { 6614 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6615 } 6616 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6617 } else { 6618 oldranks = NULL; 6619 } 6620 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6621 if (aggregate) { /* TODO: all this part could be made more efficient */ 6622 PetscInt lrows,row,ncols,*cols; 6623 PetscMPIInt nrank; 6624 PetscScalar *vals; 6625 6626 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6627 lrows = 0; 6628 if (nrank<redprocs) { 6629 lrows = size/redprocs; 6630 if (nrank<size%redprocs) lrows++; 6631 } 6632 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6633 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6634 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6635 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6636 row = nrank; 6637 ncols = xadj[1]-xadj[0]; 6638 cols = adjncy; 6639 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6640 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6641 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6642 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6643 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6644 ierr = PetscFree(xadj);CHKERRQ(ierr); 6645 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6646 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6647 ierr = PetscFree(vals);CHKERRQ(ierr); 6648 if (use_vwgt) { 6649 Vec v; 6650 const PetscScalar *array; 6651 PetscInt nl; 6652 6653 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6654 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6655 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6656 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6657 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6658 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6659 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6660 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6661 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6662 ierr = VecDestroy(&v);CHKERRQ(ierr); 6663 } 6664 } else { 6665 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6666 if (use_vwgt) { 6667 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6668 v_wgt[0] = n; 6669 } 6670 } 6671 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6672 6673 /* Partition */ 6674 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6675 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6676 if (v_wgt) { 6677 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6678 } 6679 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6680 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6681 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6682 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6683 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6684 6685 /* renumber new_ranks to avoid "holes" in new set of processors */ 6686 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6687 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6688 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6689 if (!aggregate) { 6690 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6691 #if defined(PETSC_USE_DEBUG) 6692 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6693 #endif 6694 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6695 } else if (oldranks) { 6696 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6697 } else { 6698 ranks_send_to_idx[0] = is_indices[0]; 6699 } 6700 } else { 6701 PetscInt idxs[1]; 6702 PetscMPIInt tag; 6703 MPI_Request *reqs; 6704 6705 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6706 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6707 for (i=rstart;i<rend;i++) { 6708 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6709 } 6710 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6711 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6712 ierr = PetscFree(reqs);CHKERRQ(ierr); 6713 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6714 #if defined(PETSC_USE_DEBUG) 6715 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6716 #endif 6717 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6718 } else if (oldranks) { 6719 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6720 } else { 6721 ranks_send_to_idx[0] = idxs[0]; 6722 } 6723 } 6724 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6725 /* clean up */ 6726 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6727 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6728 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6729 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6730 } 6731 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6732 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6733 6734 /* assemble parallel IS for sends */ 6735 i = 1; 6736 if (!color) i=0; 6737 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6738 PetscFunctionReturn(0); 6739 } 6740 6741 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6742 6743 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6744 { 6745 Mat local_mat; 6746 IS is_sends_internal; 6747 PetscInt rows,cols,new_local_rows; 6748 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6749 PetscBool ismatis,isdense,newisdense,destroy_mat; 6750 ISLocalToGlobalMapping l2gmap; 6751 PetscInt* l2gmap_indices; 6752 const PetscInt* is_indices; 6753 MatType new_local_type; 6754 /* buffers */ 6755 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6756 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6757 PetscInt *recv_buffer_idxs_local; 6758 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6759 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6760 /* MPI */ 6761 MPI_Comm comm,comm_n; 6762 PetscSubcomm subcomm; 6763 PetscMPIInt n_sends,n_recvs,commsize; 6764 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6765 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6766 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6767 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6768 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6769 PetscErrorCode ierr; 6770 6771 PetscFunctionBegin; 6772 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6773 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6774 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6775 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6776 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6777 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6778 PetscValidLogicalCollectiveBool(mat,reuse,6); 6779 PetscValidLogicalCollectiveInt(mat,nis,8); 6780 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6781 if (nvecs) { 6782 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6783 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6784 } 6785 /* further checks */ 6786 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6787 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6788 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6789 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6790 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6791 if (reuse && *mat_n) { 6792 PetscInt mrows,mcols,mnrows,mncols; 6793 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6794 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6795 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6796 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6797 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6798 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6799 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6800 } 6801 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6802 PetscValidLogicalCollectiveInt(mat,bs,0); 6803 6804 /* prepare IS for sending if not provided */ 6805 if (!is_sends) { 6806 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6807 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6808 } else { 6809 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6810 is_sends_internal = is_sends; 6811 } 6812 6813 /* get comm */ 6814 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6815 6816 /* compute number of sends */ 6817 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6818 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6819 6820 /* compute number of receives */ 6821 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6822 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6823 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6824 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6825 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6826 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6827 ierr = PetscFree(iflags);CHKERRQ(ierr); 6828 6829 /* restrict comm if requested */ 6830 subcomm = 0; 6831 destroy_mat = PETSC_FALSE; 6832 if (restrict_comm) { 6833 PetscMPIInt color,subcommsize; 6834 6835 color = 0; 6836 if (restrict_full) { 6837 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6838 } else { 6839 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6840 } 6841 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6842 subcommsize = commsize - subcommsize; 6843 /* check if reuse has been requested */ 6844 if (reuse) { 6845 if (*mat_n) { 6846 PetscMPIInt subcommsize2; 6847 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6848 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6849 comm_n = PetscObjectComm((PetscObject)*mat_n); 6850 } else { 6851 comm_n = PETSC_COMM_SELF; 6852 } 6853 } else { /* MAT_INITIAL_MATRIX */ 6854 PetscMPIInt rank; 6855 6856 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6857 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6858 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6859 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6860 comm_n = PetscSubcommChild(subcomm); 6861 } 6862 /* flag to destroy *mat_n if not significative */ 6863 if (color) destroy_mat = PETSC_TRUE; 6864 } else { 6865 comm_n = comm; 6866 } 6867 6868 /* prepare send/receive buffers */ 6869 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6870 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6871 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6872 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6873 if (nis) { 6874 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6875 } 6876 6877 /* Get data from local matrices */ 6878 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6879 /* TODO: See below some guidelines on how to prepare the local buffers */ 6880 /* 6881 send_buffer_vals should contain the raw values of the local matrix 6882 send_buffer_idxs should contain: 6883 - MatType_PRIVATE type 6884 - PetscInt size_of_l2gmap 6885 - PetscInt global_row_indices[size_of_l2gmap] 6886 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6887 */ 6888 else { 6889 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6890 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6891 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6892 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6893 send_buffer_idxs[1] = i; 6894 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6895 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6896 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6897 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6898 for (i=0;i<n_sends;i++) { 6899 ilengths_vals[is_indices[i]] = len*len; 6900 ilengths_idxs[is_indices[i]] = len+2; 6901 } 6902 } 6903 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6904 /* additional is (if any) */ 6905 if (nis) { 6906 PetscMPIInt psum; 6907 PetscInt j; 6908 for (j=0,psum=0;j<nis;j++) { 6909 PetscInt plen; 6910 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6911 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6912 psum += len+1; /* indices + lenght */ 6913 } 6914 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6915 for (j=0,psum=0;j<nis;j++) { 6916 PetscInt plen; 6917 const PetscInt *is_array_idxs; 6918 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6919 send_buffer_idxs_is[psum] = plen; 6920 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6921 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6922 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6923 psum += plen+1; /* indices + lenght */ 6924 } 6925 for (i=0;i<n_sends;i++) { 6926 ilengths_idxs_is[is_indices[i]] = psum; 6927 } 6928 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6929 } 6930 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 6931 6932 buf_size_idxs = 0; 6933 buf_size_vals = 0; 6934 buf_size_idxs_is = 0; 6935 buf_size_vecs = 0; 6936 for (i=0;i<n_recvs;i++) { 6937 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6938 buf_size_vals += (PetscInt)olengths_vals[i]; 6939 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6940 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6941 } 6942 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6943 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6944 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6945 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6946 6947 /* get new tags for clean communications */ 6948 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6949 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6950 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6951 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6952 6953 /* allocate for requests */ 6954 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6955 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6956 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6957 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6958 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6959 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6960 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6961 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6962 6963 /* communications */ 6964 ptr_idxs = recv_buffer_idxs; 6965 ptr_vals = recv_buffer_vals; 6966 ptr_idxs_is = recv_buffer_idxs_is; 6967 ptr_vecs = recv_buffer_vecs; 6968 for (i=0;i<n_recvs;i++) { 6969 source_dest = onodes[i]; 6970 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6971 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6972 ptr_idxs += olengths_idxs[i]; 6973 ptr_vals += olengths_vals[i]; 6974 if (nis) { 6975 source_dest = onodes_is[i]; 6976 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6977 ptr_idxs_is += olengths_idxs_is[i]; 6978 } 6979 if (nvecs) { 6980 source_dest = onodes[i]; 6981 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6982 ptr_vecs += olengths_idxs[i]-2; 6983 } 6984 } 6985 for (i=0;i<n_sends;i++) { 6986 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6987 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6988 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6989 if (nis) { 6990 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6991 } 6992 if (nvecs) { 6993 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6994 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6995 } 6996 } 6997 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6998 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6999 7000 /* assemble new l2g map */ 7001 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7002 ptr_idxs = recv_buffer_idxs; 7003 new_local_rows = 0; 7004 for (i=0;i<n_recvs;i++) { 7005 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7006 ptr_idxs += olengths_idxs[i]; 7007 } 7008 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7009 ptr_idxs = recv_buffer_idxs; 7010 new_local_rows = 0; 7011 for (i=0;i<n_recvs;i++) { 7012 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7013 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7014 ptr_idxs += olengths_idxs[i]; 7015 } 7016 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7017 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7018 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7019 7020 /* infer new local matrix type from received local matrices type */ 7021 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7022 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7023 if (n_recvs) { 7024 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7025 ptr_idxs = recv_buffer_idxs; 7026 for (i=0;i<n_recvs;i++) { 7027 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7028 new_local_type_private = MATAIJ_PRIVATE; 7029 break; 7030 } 7031 ptr_idxs += olengths_idxs[i]; 7032 } 7033 switch (new_local_type_private) { 7034 case MATDENSE_PRIVATE: 7035 new_local_type = MATSEQAIJ; 7036 bs = 1; 7037 break; 7038 case MATAIJ_PRIVATE: 7039 new_local_type = MATSEQAIJ; 7040 bs = 1; 7041 break; 7042 case MATBAIJ_PRIVATE: 7043 new_local_type = MATSEQBAIJ; 7044 break; 7045 case MATSBAIJ_PRIVATE: 7046 new_local_type = MATSEQSBAIJ; 7047 break; 7048 default: 7049 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7050 break; 7051 } 7052 } else { /* by default, new_local_type is seqaij */ 7053 new_local_type = MATSEQAIJ; 7054 bs = 1; 7055 } 7056 7057 /* create MATIS object if needed */ 7058 if (!reuse) { 7059 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7060 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7061 } else { 7062 /* it also destroys the local matrices */ 7063 if (*mat_n) { 7064 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7065 } else { /* this is a fake object */ 7066 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7067 } 7068 } 7069 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7070 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7071 7072 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7073 7074 /* Global to local map of received indices */ 7075 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7076 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7077 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7078 7079 /* restore attributes -> type of incoming data and its size */ 7080 buf_size_idxs = 0; 7081 for (i=0;i<n_recvs;i++) { 7082 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7083 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7084 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7085 } 7086 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7087 7088 /* set preallocation */ 7089 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7090 if (!newisdense) { 7091 PetscInt *new_local_nnz=0; 7092 7093 ptr_idxs = recv_buffer_idxs_local; 7094 if (n_recvs) { 7095 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7096 } 7097 for (i=0;i<n_recvs;i++) { 7098 PetscInt j; 7099 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7100 for (j=0;j<*(ptr_idxs+1);j++) { 7101 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7102 } 7103 } else { 7104 /* TODO */ 7105 } 7106 ptr_idxs += olengths_idxs[i]; 7107 } 7108 if (new_local_nnz) { 7109 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7110 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7111 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7112 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7113 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7114 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7115 } else { 7116 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7117 } 7118 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7119 } else { 7120 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7121 } 7122 7123 /* set values */ 7124 ptr_vals = recv_buffer_vals; 7125 ptr_idxs = recv_buffer_idxs_local; 7126 for (i=0;i<n_recvs;i++) { 7127 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7128 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7129 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7130 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7131 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7132 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7133 } else { 7134 /* TODO */ 7135 } 7136 ptr_idxs += olengths_idxs[i]; 7137 ptr_vals += olengths_vals[i]; 7138 } 7139 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7140 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7141 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7142 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7143 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7144 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7145 7146 #if 0 7147 if (!restrict_comm) { /* check */ 7148 Vec lvec,rvec; 7149 PetscReal infty_error; 7150 7151 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7152 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7153 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7154 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7155 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7156 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7157 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7158 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7159 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7160 } 7161 #endif 7162 7163 /* assemble new additional is (if any) */ 7164 if (nis) { 7165 PetscInt **temp_idxs,*count_is,j,psum; 7166 7167 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7168 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7169 ptr_idxs = recv_buffer_idxs_is; 7170 psum = 0; 7171 for (i=0;i<n_recvs;i++) { 7172 for (j=0;j<nis;j++) { 7173 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7174 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7175 psum += plen; 7176 ptr_idxs += plen+1; /* shift pointer to received data */ 7177 } 7178 } 7179 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7180 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7181 for (i=1;i<nis;i++) { 7182 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7183 } 7184 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7185 ptr_idxs = recv_buffer_idxs_is; 7186 for (i=0;i<n_recvs;i++) { 7187 for (j=0;j<nis;j++) { 7188 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7189 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7190 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7191 ptr_idxs += plen+1; /* shift pointer to received data */ 7192 } 7193 } 7194 for (i=0;i<nis;i++) { 7195 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7196 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7197 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7198 } 7199 ierr = PetscFree(count_is);CHKERRQ(ierr); 7200 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7201 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7202 } 7203 /* free workspace */ 7204 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7205 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7206 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7207 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7208 if (isdense) { 7209 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7210 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7211 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7212 } else { 7213 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7214 } 7215 if (nis) { 7216 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7217 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7218 } 7219 7220 if (nvecs) { 7221 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7222 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7223 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7224 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7225 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7226 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7227 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7228 /* set values */ 7229 ptr_vals = recv_buffer_vecs; 7230 ptr_idxs = recv_buffer_idxs_local; 7231 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7232 for (i=0;i<n_recvs;i++) { 7233 PetscInt j; 7234 for (j=0;j<*(ptr_idxs+1);j++) { 7235 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7236 } 7237 ptr_idxs += olengths_idxs[i]; 7238 ptr_vals += olengths_idxs[i]-2; 7239 } 7240 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7241 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7242 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7243 } 7244 7245 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7246 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7247 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7248 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7249 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7250 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7251 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7252 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7253 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7254 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7255 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7256 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7257 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7258 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7259 ierr = PetscFree(onodes);CHKERRQ(ierr); 7260 if (nis) { 7261 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7262 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7263 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7264 } 7265 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7266 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7267 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7268 for (i=0;i<nis;i++) { 7269 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7270 } 7271 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7272 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7273 } 7274 *mat_n = NULL; 7275 } 7276 PetscFunctionReturn(0); 7277 } 7278 7279 /* temporary hack into ksp private data structure */ 7280 #include <petsc/private/kspimpl.h> 7281 7282 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7283 { 7284 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7285 PC_IS *pcis = (PC_IS*)pc->data; 7286 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7287 Mat coarsedivudotp = NULL; 7288 Mat coarseG,t_coarse_mat_is; 7289 MatNullSpace CoarseNullSpace = NULL; 7290 ISLocalToGlobalMapping coarse_islg; 7291 IS coarse_is,*isarray; 7292 PetscInt i,im_active=-1,active_procs=-1; 7293 PetscInt nis,nisdofs,nisneu,nisvert; 7294 PC pc_temp; 7295 PCType coarse_pc_type; 7296 KSPType coarse_ksp_type; 7297 PetscBool multilevel_requested,multilevel_allowed; 7298 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7299 PetscInt ncoarse,nedcfield; 7300 PetscBool compute_vecs = PETSC_FALSE; 7301 PetscScalar *array; 7302 MatReuse coarse_mat_reuse; 7303 PetscBool restr, full_restr, have_void; 7304 PetscMPIInt commsize; 7305 PetscErrorCode ierr; 7306 7307 PetscFunctionBegin; 7308 /* Assign global numbering to coarse dofs */ 7309 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7310 PetscInt ocoarse_size; 7311 compute_vecs = PETSC_TRUE; 7312 7313 pcbddc->new_primal_space = PETSC_TRUE; 7314 ocoarse_size = pcbddc->coarse_size; 7315 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7316 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7317 /* see if we can avoid some work */ 7318 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7319 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7320 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7321 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7322 coarse_reuse = PETSC_FALSE; 7323 } else { /* we can safely reuse already computed coarse matrix */ 7324 coarse_reuse = PETSC_TRUE; 7325 } 7326 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7327 coarse_reuse = PETSC_FALSE; 7328 } 7329 /* reset any subassembling information */ 7330 if (!coarse_reuse || pcbddc->recompute_topography) { 7331 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7332 } 7333 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7334 coarse_reuse = PETSC_TRUE; 7335 } 7336 /* assemble coarse matrix */ 7337 if (coarse_reuse && pcbddc->coarse_ksp) { 7338 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7339 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7340 coarse_mat_reuse = MAT_REUSE_MATRIX; 7341 } else { 7342 coarse_mat = NULL; 7343 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7344 } 7345 7346 /* creates temporary l2gmap and IS for coarse indexes */ 7347 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7348 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7349 7350 /* creates temporary MATIS object for coarse matrix */ 7351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7352 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7353 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7354 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7355 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7356 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7357 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7358 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7359 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7360 7361 /* count "active" (i.e. with positive local size) and "void" processes */ 7362 im_active = !!(pcis->n); 7363 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7364 7365 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7366 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7367 /* full_restr : just use the receivers from the subassembling pattern */ 7368 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7369 coarse_mat_is = NULL; 7370 multilevel_allowed = PETSC_FALSE; 7371 multilevel_requested = PETSC_FALSE; 7372 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7373 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7374 if (multilevel_requested) { 7375 ncoarse = active_procs/pcbddc->coarsening_ratio; 7376 restr = PETSC_FALSE; 7377 full_restr = PETSC_FALSE; 7378 } else { 7379 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7380 restr = PETSC_TRUE; 7381 full_restr = PETSC_TRUE; 7382 } 7383 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7384 ncoarse = PetscMax(1,ncoarse); 7385 if (!pcbddc->coarse_subassembling) { 7386 if (pcbddc->coarsening_ratio > 1) { 7387 if (multilevel_requested) { 7388 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7389 } else { 7390 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7391 } 7392 } else { 7393 PetscMPIInt rank; 7394 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7395 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7396 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7397 } 7398 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7399 PetscInt psum; 7400 if (pcbddc->coarse_ksp) psum = 1; 7401 else psum = 0; 7402 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7403 if (ncoarse < commsize) have_void = PETSC_TRUE; 7404 } 7405 /* determine if we can go multilevel */ 7406 if (multilevel_requested) { 7407 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7408 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7409 } 7410 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7411 7412 /* dump subassembling pattern */ 7413 if (pcbddc->dbg_flag && multilevel_allowed) { 7414 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7415 } 7416 7417 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7418 nedcfield = -1; 7419 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7420 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7421 const PetscInt *idxs; 7422 ISLocalToGlobalMapping tmap; 7423 7424 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7425 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7426 /* allocate space for temporary storage */ 7427 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7428 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7429 /* allocate for IS array */ 7430 nisdofs = pcbddc->n_ISForDofsLocal; 7431 if (pcbddc->nedclocal) { 7432 if (pcbddc->nedfield > -1) { 7433 nedcfield = pcbddc->nedfield; 7434 } else { 7435 nedcfield = 0; 7436 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7437 nisdofs = 1; 7438 } 7439 } 7440 nisneu = !!pcbddc->NeumannBoundariesLocal; 7441 nisvert = 0; /* nisvert is not used */ 7442 nis = nisdofs + nisneu + nisvert; 7443 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7444 /* dofs splitting */ 7445 for (i=0;i<nisdofs;i++) { 7446 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7447 if (nedcfield != i) { 7448 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7449 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7450 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7451 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7452 } else { 7453 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7454 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7455 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7456 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7457 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7458 } 7459 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7460 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7461 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7462 } 7463 /* neumann boundaries */ 7464 if (pcbddc->NeumannBoundariesLocal) { 7465 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7466 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7467 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7468 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7469 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7470 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7471 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7472 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7473 } 7474 /* free memory */ 7475 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7476 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7477 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7478 } else { 7479 nis = 0; 7480 nisdofs = 0; 7481 nisneu = 0; 7482 nisvert = 0; 7483 isarray = NULL; 7484 } 7485 /* destroy no longer needed map */ 7486 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7487 7488 /* subassemble */ 7489 if (multilevel_allowed) { 7490 Vec vp[1]; 7491 PetscInt nvecs = 0; 7492 PetscBool reuse,reuser; 7493 7494 if (coarse_mat) reuse = PETSC_TRUE; 7495 else reuse = PETSC_FALSE; 7496 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7497 vp[0] = NULL; 7498 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7499 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7500 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7501 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7502 nvecs = 1; 7503 7504 if (pcbddc->divudotp) { 7505 Mat B,loc_divudotp; 7506 Vec v,p; 7507 IS dummy; 7508 PetscInt np; 7509 7510 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7511 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7512 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7513 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7514 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7515 ierr = VecSet(p,1.);CHKERRQ(ierr); 7516 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7517 ierr = VecDestroy(&p);CHKERRQ(ierr); 7518 ierr = MatDestroy(&B);CHKERRQ(ierr); 7519 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7520 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7521 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7522 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7523 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7524 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7525 ierr = VecDestroy(&v);CHKERRQ(ierr); 7526 } 7527 } 7528 if (reuser) { 7529 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7530 } else { 7531 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7532 } 7533 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7534 PetscScalar *arraym,*arrayv; 7535 PetscInt nl; 7536 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7538 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7539 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7540 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7541 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7542 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7543 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7544 } else { 7545 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7546 } 7547 } else { 7548 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7549 } 7550 if (coarse_mat_is || coarse_mat) { 7551 PetscMPIInt size; 7552 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7553 if (!multilevel_allowed) { 7554 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7555 } else { 7556 Mat A; 7557 7558 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7559 if (coarse_mat_is) { 7560 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7561 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7562 coarse_mat = coarse_mat_is; 7563 } 7564 /* be sure we don't have MatSeqDENSE as local mat */ 7565 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7566 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7567 } 7568 } 7569 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7570 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7571 7572 /* create local to global scatters for coarse problem */ 7573 if (compute_vecs) { 7574 PetscInt lrows; 7575 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7576 if (coarse_mat) { 7577 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7578 } else { 7579 lrows = 0; 7580 } 7581 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7582 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7583 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7584 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7585 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7586 } 7587 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7588 7589 /* set defaults for coarse KSP and PC */ 7590 if (multilevel_allowed) { 7591 coarse_ksp_type = KSPRICHARDSON; 7592 coarse_pc_type = PCBDDC; 7593 } else { 7594 coarse_ksp_type = KSPPREONLY; 7595 coarse_pc_type = PCREDUNDANT; 7596 } 7597 7598 /* print some info if requested */ 7599 if (pcbddc->dbg_flag) { 7600 if (!multilevel_allowed) { 7601 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7602 if (multilevel_requested) { 7603 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7604 } else if (pcbddc->max_levels) { 7605 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7606 } 7607 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7608 } 7609 } 7610 7611 /* communicate coarse discrete gradient */ 7612 coarseG = NULL; 7613 if (pcbddc->nedcG && multilevel_allowed) { 7614 MPI_Comm ccomm; 7615 if (coarse_mat) { 7616 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7617 } else { 7618 ccomm = MPI_COMM_NULL; 7619 } 7620 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7621 } 7622 7623 /* create the coarse KSP object only once with defaults */ 7624 if (coarse_mat) { 7625 PetscViewer dbg_viewer = NULL; 7626 if (pcbddc->dbg_flag) { 7627 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7628 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7629 } 7630 if (!pcbddc->coarse_ksp) { 7631 char prefix[256],str_level[16]; 7632 size_t len; 7633 7634 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7635 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7636 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7637 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7638 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7639 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7640 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7641 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7642 /* TODO is this logic correct? should check for coarse_mat type */ 7643 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7644 /* prefix */ 7645 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7646 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7647 if (!pcbddc->current_level) { 7648 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7649 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7650 } else { 7651 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7652 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7653 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7654 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7655 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7656 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7657 } 7658 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7659 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7660 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7661 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7662 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7663 /* allow user customization */ 7664 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7665 } 7666 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7667 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7668 if (nisdofs) { 7669 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7670 for (i=0;i<nisdofs;i++) { 7671 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7672 } 7673 } 7674 if (nisneu) { 7675 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7676 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7677 } 7678 if (nisvert) { 7679 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7680 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7681 } 7682 if (coarseG) { 7683 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7684 } 7685 7686 /* get some info after set from options */ 7687 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7688 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7689 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7690 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7691 if (isbddc && !multilevel_allowed) { 7692 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7693 isbddc = PETSC_FALSE; 7694 } 7695 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7696 if (multilevel_requested && !isbddc && !isnn) { 7697 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7698 isbddc = PETSC_TRUE; 7699 isnn = PETSC_FALSE; 7700 } 7701 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7702 if (isredundant) { 7703 KSP inner_ksp; 7704 PC inner_pc; 7705 7706 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7707 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7708 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7709 } 7710 7711 /* parameters which miss an API */ 7712 if (isbddc) { 7713 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7714 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7715 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7716 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7717 if (pcbddc_coarse->benign_saddle_point) { 7718 Mat coarsedivudotp_is; 7719 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7720 IS row,col; 7721 const PetscInt *gidxs; 7722 PetscInt n,st,M,N; 7723 7724 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7725 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7726 st = st-n; 7727 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7728 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7729 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7730 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7731 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7732 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7733 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7734 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7735 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7736 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7737 ierr = ISDestroy(&row);CHKERRQ(ierr); 7738 ierr = ISDestroy(&col);CHKERRQ(ierr); 7739 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7740 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7741 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7742 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7743 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7744 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7745 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7746 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7747 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7748 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7749 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7750 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7751 } 7752 } 7753 7754 /* propagate symmetry info of coarse matrix */ 7755 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7756 if (pc->pmat->symmetric_set) { 7757 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7758 } 7759 if (pc->pmat->hermitian_set) { 7760 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7761 } 7762 if (pc->pmat->spd_set) { 7763 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7764 } 7765 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7766 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7767 } 7768 /* set operators */ 7769 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7770 if (pcbddc->dbg_flag) { 7771 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7772 } 7773 } 7774 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7775 ierr = PetscFree(isarray);CHKERRQ(ierr); 7776 #if 0 7777 { 7778 PetscViewer viewer; 7779 char filename[256]; 7780 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7781 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7782 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7783 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7784 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7785 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7786 } 7787 #endif 7788 7789 if (pcbddc->coarse_ksp) { 7790 Vec crhs,csol; 7791 7792 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7793 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7794 if (!csol) { 7795 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7796 } 7797 if (!crhs) { 7798 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7799 } 7800 } 7801 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7802 7803 /* compute null space for coarse solver if the benign trick has been requested */ 7804 if (pcbddc->benign_null) { 7805 7806 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7807 for (i=0;i<pcbddc->benign_n;i++) { 7808 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7809 } 7810 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7811 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7812 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7813 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7814 if (coarse_mat) { 7815 Vec nullv; 7816 PetscScalar *array,*array2; 7817 PetscInt nl; 7818 7819 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7820 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7821 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7822 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7823 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7824 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7825 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7826 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7827 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7828 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7829 } 7830 } 7831 7832 if (pcbddc->coarse_ksp) { 7833 PetscBool ispreonly; 7834 7835 if (CoarseNullSpace) { 7836 PetscBool isnull; 7837 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7838 if (isnull) { 7839 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7840 } 7841 /* TODO: add local nullspaces (if any) */ 7842 } 7843 /* setup coarse ksp */ 7844 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7845 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7846 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7847 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7848 KSP check_ksp; 7849 KSPType check_ksp_type; 7850 PC check_pc; 7851 Vec check_vec,coarse_vec; 7852 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7853 PetscInt its; 7854 PetscBool compute_eigs; 7855 PetscReal *eigs_r,*eigs_c; 7856 PetscInt neigs; 7857 const char *prefix; 7858 7859 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7860 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7861 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7862 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7863 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7864 /* prevent from setup unneeded object */ 7865 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7866 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7867 if (ispreonly) { 7868 check_ksp_type = KSPPREONLY; 7869 compute_eigs = PETSC_FALSE; 7870 } else { 7871 check_ksp_type = KSPGMRES; 7872 compute_eigs = PETSC_TRUE; 7873 } 7874 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7875 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7876 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7877 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7878 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7879 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7880 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7881 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7882 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7883 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7884 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7885 /* create random vec */ 7886 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7887 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7888 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7889 /* solve coarse problem */ 7890 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7891 /* set eigenvalue estimation if preonly has not been requested */ 7892 if (compute_eigs) { 7893 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7894 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7895 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7896 if (neigs) { 7897 lambda_max = eigs_r[neigs-1]; 7898 lambda_min = eigs_r[0]; 7899 if (pcbddc->use_coarse_estimates) { 7900 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7901 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7902 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7903 } 7904 } 7905 } 7906 } 7907 7908 /* check coarse problem residual error */ 7909 if (pcbddc->dbg_flag) { 7910 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7911 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7912 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7913 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7914 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7915 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7916 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7917 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7918 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7919 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7920 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7921 if (CoarseNullSpace) { 7922 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7923 } 7924 if (compute_eigs) { 7925 PetscReal lambda_max_s,lambda_min_s; 7926 KSPConvergedReason reason; 7927 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7928 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7929 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7930 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7931 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7932 for (i=0;i<neigs;i++) { 7933 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7934 } 7935 } 7936 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7937 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7938 } 7939 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7940 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7941 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7942 if (compute_eigs) { 7943 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7944 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7945 } 7946 } 7947 } 7948 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7949 /* print additional info */ 7950 if (pcbddc->dbg_flag) { 7951 /* waits until all processes reaches this point */ 7952 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7954 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7955 } 7956 7957 /* free memory */ 7958 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7959 PetscFunctionReturn(0); 7960 } 7961 7962 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7963 { 7964 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7965 PC_IS* pcis = (PC_IS*)pc->data; 7966 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7967 IS subset,subset_mult,subset_n; 7968 PetscInt local_size,coarse_size=0; 7969 PetscInt *local_primal_indices=NULL; 7970 const PetscInt *t_local_primal_indices; 7971 PetscErrorCode ierr; 7972 7973 PetscFunctionBegin; 7974 /* Compute global number of coarse dofs */ 7975 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7976 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7977 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7978 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7979 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7980 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7981 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7982 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7983 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7984 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7985 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7986 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7987 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7988 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7989 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7990 7991 /* check numbering */ 7992 if (pcbddc->dbg_flag) { 7993 PetscScalar coarsesum,*array,*array2; 7994 PetscInt i; 7995 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7996 7997 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7998 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7999 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8000 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8001 /* counter */ 8002 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8003 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8004 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8005 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8006 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8007 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8008 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8009 for (i=0;i<pcbddc->local_primal_size;i++) { 8010 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8011 } 8012 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8013 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8014 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8015 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8016 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8017 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8018 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8019 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8020 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8021 for (i=0;i<pcis->n;i++) { 8022 if (array[i] != 0.0 && array[i] != array2[i]) { 8023 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8024 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8025 set_error = PETSC_TRUE; 8026 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8027 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8028 } 8029 } 8030 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8031 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8032 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8033 for (i=0;i<pcis->n;i++) { 8034 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8035 } 8036 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8037 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8038 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8039 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8040 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8041 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8042 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8043 PetscInt *gidxs; 8044 8045 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8046 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8047 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8048 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8049 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8050 for (i=0;i<pcbddc->local_primal_size;i++) { 8051 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8052 } 8053 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8054 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8055 } 8056 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8057 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8058 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8059 } 8060 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8061 /* get back data */ 8062 *coarse_size_n = coarse_size; 8063 *local_primal_indices_n = local_primal_indices; 8064 PetscFunctionReturn(0); 8065 } 8066 8067 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8068 { 8069 IS localis_t; 8070 PetscInt i,lsize,*idxs,n; 8071 PetscScalar *vals; 8072 PetscErrorCode ierr; 8073 8074 PetscFunctionBegin; 8075 /* get indices in local ordering exploiting local to global map */ 8076 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8077 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8078 for (i=0;i<lsize;i++) vals[i] = 1.0; 8079 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8080 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8081 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8082 if (idxs) { /* multilevel guard */ 8083 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8084 } 8085 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8086 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8087 ierr = PetscFree(vals);CHKERRQ(ierr); 8088 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8089 /* now compute set in local ordering */ 8090 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8091 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8092 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8093 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8094 for (i=0,lsize=0;i<n;i++) { 8095 if (PetscRealPart(vals[i]) > 0.5) { 8096 lsize++; 8097 } 8098 } 8099 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8100 for (i=0,lsize=0;i<n;i++) { 8101 if (PetscRealPart(vals[i]) > 0.5) { 8102 idxs[lsize++] = i; 8103 } 8104 } 8105 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8106 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8107 *localis = localis_t; 8108 PetscFunctionReturn(0); 8109 } 8110 8111 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8112 { 8113 PC_IS *pcis=(PC_IS*)pc->data; 8114 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8115 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8116 Mat S_j; 8117 PetscInt *used_xadj,*used_adjncy; 8118 PetscBool free_used_adj; 8119 PetscErrorCode ierr; 8120 8121 PetscFunctionBegin; 8122 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8123 free_used_adj = PETSC_FALSE; 8124 if (pcbddc->sub_schurs_layers == -1) { 8125 used_xadj = NULL; 8126 used_adjncy = NULL; 8127 } else { 8128 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8129 used_xadj = pcbddc->mat_graph->xadj; 8130 used_adjncy = pcbddc->mat_graph->adjncy; 8131 } else if (pcbddc->computed_rowadj) { 8132 used_xadj = pcbddc->mat_graph->xadj; 8133 used_adjncy = pcbddc->mat_graph->adjncy; 8134 } else { 8135 PetscBool flg_row=PETSC_FALSE; 8136 const PetscInt *xadj,*adjncy; 8137 PetscInt nvtxs; 8138 8139 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8140 if (flg_row) { 8141 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8142 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8143 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8144 free_used_adj = PETSC_TRUE; 8145 } else { 8146 pcbddc->sub_schurs_layers = -1; 8147 used_xadj = NULL; 8148 used_adjncy = NULL; 8149 } 8150 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8151 } 8152 } 8153 8154 /* setup sub_schurs data */ 8155 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8156 if (!sub_schurs->schur_explicit) { 8157 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8158 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8159 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8160 } else { 8161 Mat change = NULL; 8162 Vec scaling = NULL; 8163 IS change_primal = NULL, iP; 8164 PetscInt benign_n; 8165 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8166 PetscBool isseqaij,need_change = PETSC_FALSE; 8167 PetscBool discrete_harmonic = PETSC_FALSE; 8168 8169 if (!pcbddc->use_vertices && reuse_solvers) { 8170 PetscInt n_vertices; 8171 8172 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8173 reuse_solvers = (PetscBool)!n_vertices; 8174 } 8175 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8176 if (!isseqaij) { 8177 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8178 if (matis->A == pcbddc->local_mat) { 8179 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8180 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8181 } else { 8182 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8183 } 8184 } 8185 if (!pcbddc->benign_change_explicit) { 8186 benign_n = pcbddc->benign_n; 8187 } else { 8188 benign_n = 0; 8189 } 8190 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8191 We need a global reduction to avoid possible deadlocks. 8192 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8193 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8194 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8195 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8196 need_change = (PetscBool)(!need_change); 8197 } 8198 /* If the user defines additional constraints, we import them here. 8199 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8200 if (need_change) { 8201 PC_IS *pcisf; 8202 PC_BDDC *pcbddcf; 8203 PC pcf; 8204 8205 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8206 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8207 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8208 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8209 8210 /* hacks */ 8211 pcisf = (PC_IS*)pcf->data; 8212 pcisf->is_B_local = pcis->is_B_local; 8213 pcisf->vec1_N = pcis->vec1_N; 8214 pcisf->BtoNmap = pcis->BtoNmap; 8215 pcisf->n = pcis->n; 8216 pcisf->n_B = pcis->n_B; 8217 pcbddcf = (PC_BDDC*)pcf->data; 8218 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8219 pcbddcf->mat_graph = pcbddc->mat_graph; 8220 pcbddcf->use_faces = PETSC_TRUE; 8221 pcbddcf->use_change_of_basis = PETSC_TRUE; 8222 pcbddcf->use_change_on_faces = PETSC_TRUE; 8223 pcbddcf->use_qr_single = PETSC_TRUE; 8224 pcbddcf->fake_change = PETSC_TRUE; 8225 8226 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8227 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8228 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8229 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8230 change = pcbddcf->ConstraintMatrix; 8231 pcbddcf->ConstraintMatrix = NULL; 8232 8233 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8234 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8235 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8236 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8237 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8238 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8239 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8240 pcf->ops->destroy = NULL; 8241 pcf->ops->reset = NULL; 8242 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8243 } 8244 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8245 8246 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8247 if (iP) { 8248 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8249 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8250 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8251 } 8252 if (discrete_harmonic) { 8253 Mat A; 8254 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8255 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8256 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8257 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8258 ierr = MatDestroy(&A);CHKERRQ(ierr); 8259 } else { 8260 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8261 } 8262 ierr = MatDestroy(&change);CHKERRQ(ierr); 8263 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8264 } 8265 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8266 8267 /* free adjacency */ 8268 if (free_used_adj) { 8269 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8270 } 8271 PetscFunctionReturn(0); 8272 } 8273 8274 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8275 { 8276 PC_IS *pcis=(PC_IS*)pc->data; 8277 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8278 PCBDDCGraph graph; 8279 PetscErrorCode ierr; 8280 8281 PetscFunctionBegin; 8282 /* attach interface graph for determining subsets */ 8283 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8284 IS verticesIS,verticescomm; 8285 PetscInt vsize,*idxs; 8286 8287 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8288 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8289 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8290 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8291 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8292 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8293 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8294 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8295 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8296 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8297 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8298 } else { 8299 graph = pcbddc->mat_graph; 8300 } 8301 /* print some info */ 8302 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8303 IS vertices; 8304 PetscInt nv,nedges,nfaces; 8305 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8306 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8307 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8308 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8309 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8310 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8311 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8312 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8313 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8314 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8315 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8316 } 8317 8318 /* sub_schurs init */ 8319 if (!pcbddc->sub_schurs) { 8320 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8321 } 8322 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8323 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8324 8325 /* free graph struct */ 8326 if (pcbddc->sub_schurs_rebuild) { 8327 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8328 } 8329 PetscFunctionReturn(0); 8330 } 8331 8332 PetscErrorCode PCBDDCCheckOperator(PC pc) 8333 { 8334 PC_IS *pcis=(PC_IS*)pc->data; 8335 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8336 PetscErrorCode ierr; 8337 8338 PetscFunctionBegin; 8339 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8340 IS zerodiag = NULL; 8341 Mat S_j,B0_B=NULL; 8342 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8343 PetscScalar *p0_check,*array,*array2; 8344 PetscReal norm; 8345 PetscInt i; 8346 8347 /* B0 and B0_B */ 8348 if (zerodiag) { 8349 IS dummy; 8350 8351 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8352 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8353 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8354 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8355 } 8356 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8357 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8358 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8359 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8360 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8361 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8362 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8363 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8364 /* S_j */ 8365 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8366 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8367 8368 /* mimic vector in \widetilde{W}_\Gamma */ 8369 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8370 /* continuous in primal space */ 8371 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8372 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8373 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8374 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8375 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8376 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8377 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8378 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8379 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8380 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8381 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8382 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8383 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8384 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8385 8386 /* assemble rhs for coarse problem */ 8387 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8388 /* local with Schur */ 8389 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8390 if (zerodiag) { 8391 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8392 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8393 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8394 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8395 } 8396 /* sum on primal nodes the local contributions */ 8397 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8398 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8399 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8400 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8401 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8402 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8403 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8404 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8405 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8406 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8407 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8408 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8409 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8410 /* scale primal nodes (BDDC sums contibutions) */ 8411 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8412 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8413 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8416 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8417 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8418 /* global: \widetilde{B0}_B w_\Gamma */ 8419 if (zerodiag) { 8420 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8421 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8422 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8423 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8424 } 8425 /* BDDC */ 8426 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8427 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8428 8429 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8430 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8431 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8432 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8433 for (i=0;i<pcbddc->benign_n;i++) { 8434 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8435 } 8436 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8437 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8438 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8439 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8440 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8441 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8442 } 8443 PetscFunctionReturn(0); 8444 } 8445 8446 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8447 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8448 { 8449 Mat At; 8450 IS rows; 8451 PetscInt rst,ren; 8452 PetscErrorCode ierr; 8453 PetscLayout rmap; 8454 8455 PetscFunctionBegin; 8456 rst = ren = 0; 8457 if (ccomm != MPI_COMM_NULL) { 8458 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8459 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8460 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8461 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8462 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8463 } 8464 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8465 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8466 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8467 8468 if (ccomm != MPI_COMM_NULL) { 8469 Mat_MPIAIJ *a,*b; 8470 IS from,to; 8471 Vec gvec; 8472 PetscInt lsize; 8473 8474 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8475 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8476 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8477 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8478 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8479 a = (Mat_MPIAIJ*)At->data; 8480 b = (Mat_MPIAIJ*)(*B)->data; 8481 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8482 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8483 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8484 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8485 b->A = a->A; 8486 b->B = a->B; 8487 8488 b->donotstash = a->donotstash; 8489 b->roworiented = a->roworiented; 8490 b->rowindices = 0; 8491 b->rowvalues = 0; 8492 b->getrowactive = PETSC_FALSE; 8493 8494 (*B)->rmap = rmap; 8495 (*B)->factortype = A->factortype; 8496 (*B)->assembled = PETSC_TRUE; 8497 (*B)->insertmode = NOT_SET_VALUES; 8498 (*B)->preallocated = PETSC_TRUE; 8499 8500 if (a->colmap) { 8501 #if defined(PETSC_USE_CTABLE) 8502 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8503 #else 8504 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8505 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8506 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8507 #endif 8508 } else b->colmap = 0; 8509 if (a->garray) { 8510 PetscInt len; 8511 len = a->B->cmap->n; 8512 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8513 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8514 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8515 } else b->garray = 0; 8516 8517 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8518 b->lvec = a->lvec; 8519 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8520 8521 /* cannot use VecScatterCopy */ 8522 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8523 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8524 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8525 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8526 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8527 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8528 ierr = ISDestroy(&from);CHKERRQ(ierr); 8529 ierr = ISDestroy(&to);CHKERRQ(ierr); 8530 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8531 } 8532 ierr = MatDestroy(&At);CHKERRQ(ierr); 8533 PetscFunctionReturn(0); 8534 } 8535