1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscdm.h> 5 #include <petscblaslapack.h> 6 #include <petsc/private/sfimpl.h> 7 8 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 9 10 /* if range is true, it returns B s.t. span{B} = range(A) 11 if range is false, it returns B s.t. range(B) _|_ range(A) */ 12 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 13 { 14 #if !defined(PETSC_USE_COMPLEX) 15 PetscScalar *uwork,*data,*U, ds = 0.; 16 PetscReal *sing; 17 PetscBLASInt bM,bN,lwork,lierr,di = 1; 18 PetscInt ulw,i,nr,nc,n; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 #if defined(PETSC_MISSING_LAPACK_GESVD) 23 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 24 #else 25 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 26 if (!nr || !nc) PetscFunctionReturn(0); 27 28 /* workspace */ 29 if (!work) { 30 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 31 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 32 } else { 33 ulw = lw; 34 uwork = work; 35 } 36 n = PetscMin(nr,nc); 37 if (!rwork) { 38 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 39 } else { 40 sing = rwork; 41 } 42 43 /* SVD */ 44 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 48 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 49 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 50 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 51 ierr = PetscFPTrapPop();CHKERRQ(ierr); 52 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 53 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 54 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 55 if (!rwork) { 56 ierr = PetscFree(sing);CHKERRQ(ierr); 57 } 58 if (!work) { 59 ierr = PetscFree(uwork);CHKERRQ(ierr); 60 } 61 /* create B */ 62 if (!range) { 63 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 64 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 65 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 66 } else { 67 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 68 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 69 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 70 } 71 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscFree(U);CHKERRQ(ierr); 73 #endif 74 #else /* PETSC_USE_COMPLEX */ 75 PetscFunctionBegin; 76 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 77 #endif 78 PetscFunctionReturn(0); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 PetscErrorCode ierr; 90 Mat GE,GEd; 91 PetscInt rsize,csize,esize; 92 PetscScalar *ptr; 93 94 PetscFunctionBegin; 95 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 96 if (!esize) PetscFunctionReturn(0); 97 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 98 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 99 100 /* gradients */ 101 ptr = work + 5*esize; 102 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 103 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 104 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 105 ierr = MatDestroy(&GE);CHKERRQ(ierr); 106 107 /* constants */ 108 ptr += rsize*csize; 109 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 110 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 111 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 112 ierr = MatDestroy(&GE);CHKERRQ(ierr); 113 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 114 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 115 116 if (corners) { 117 Mat GEc; 118 PetscScalar *vals,v; 119 120 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 121 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 122 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 123 /* v = PetscAbsScalar(vals[0]) */; 124 v = 1.; 125 cvals[0] = vals[0]/v; 126 cvals[1] = vals[1]/v; 127 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 128 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 134 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 135 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 136 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 137 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 138 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 139 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 141 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 142 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 143 } 144 #endif 145 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 146 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 147 } 148 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode PCBDDCNedelecSupport(PC pc) 153 { 154 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 155 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 156 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 157 Vec tvec; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 160 MPI_Comm comm; 161 IS lned,primals,allprimals,nedfieldlocal; 162 IS *eedges,*extrows,*extcols,*alleedges; 163 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 164 PetscScalar *vals,*work; 165 PetscReal *rwork; 166 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 167 PetscInt ne,nv,Lv,order,n,field; 168 PetscInt n_neigh,*neigh,*n_shared,**shared; 169 PetscInt i,j,extmem,cum,maxsize,nee; 170 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 171 PetscInt *sfvleaves,*sfvroots; 172 PetscInt *corners,*cedges; 173 PetscInt *ecount,**eneighs,*vcount,**vneighs; 174 #if defined(PETSC_USE_DEBUG) 175 PetscInt *emarks; 176 #endif 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 /* If the discrete gradient is defined for a subset of dofs and global is true, 182 it assumes G is given in global ordering for all the dofs. 183 Otherwise, the ordering is global for the Nedelec field */ 184 order = pcbddc->nedorder; 185 conforming = pcbddc->conforming; 186 field = pcbddc->nedfield; 187 global = pcbddc->nedglobal; 188 setprimal = PETSC_FALSE; 189 print = PETSC_FALSE; 190 singular = PETSC_FALSE; 191 192 /* Command line customization */ 193 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 194 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 195 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 196 ierr = PetscOptionsInt ("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 197 /* print debug info TODO: to be removed */ 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsEnd();CHKERRQ(ierr); 200 201 /* Return if there are no edges in the decomposition and the problem is not singular */ 202 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 203 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 204 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 205 if (!singular) { 206 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 207 lrc[0] = PETSC_FALSE; 208 for (i=0;i<n;i++) { 209 if (PetscRealPart(vals[i]) > 2.) { 210 lrc[0] = PETSC_TRUE; 211 break; 212 } 213 } 214 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 215 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 216 if (!lrc[1]) PetscFunctionReturn(0); 217 } 218 219 /* Get Nedelec field */ 220 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 221 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 222 if (pcbddc->n_ISForDofsLocal && field >= 0) { 223 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 224 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 225 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 226 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 227 ne = n; 228 nedfieldlocal = NULL; 229 global = PETSC_TRUE; 230 } else if (field == PETSC_DECIDE) { 231 PetscInt rst,ren,*idx; 232 233 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 234 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 235 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 236 for (i=rst;i<ren;i++) { 237 PetscInt nc; 238 239 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 240 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 241 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 242 } 243 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 244 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 245 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 246 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 247 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 248 } else { 249 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 250 } 251 252 /* Sanity checks */ 253 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 254 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 255 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 256 257 /* Just set primal dofs and return */ 258 if (setprimal) { 259 IS enedfieldlocal; 260 PetscInt *eidxs; 261 262 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 263 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 264 if (nedfieldlocal) { 265 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 266 for (i=0,cum=0;i<ne;i++) { 267 if (PetscRealPart(vals[idxs[i]]) > 2.) { 268 eidxs[cum++] = idxs[i]; 269 } 270 } 271 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 272 } else { 273 for (i=0,cum=0;i<ne;i++) { 274 if (PetscRealPart(vals[i]) > 2.) { 275 eidxs[cum++] = i; 276 } 277 } 278 } 279 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 280 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 281 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 282 ierr = PetscFree(eidxs);CHKERRQ(ierr); 283 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 284 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 /* Compute some l2g maps */ 289 if (nedfieldlocal) { 290 IS is; 291 292 /* need to map from the local Nedelec field to local numbering */ 293 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 294 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 295 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 296 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 298 if (global) { 299 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 300 el2g = al2g; 301 } else { 302 IS gis; 303 304 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 305 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 306 ierr = ISDestroy(&gis);CHKERRQ(ierr); 307 } 308 ierr = ISDestroy(&is);CHKERRQ(ierr); 309 } else { 310 /* restore default */ 311 pcbddc->nedfield = -1; 312 /* one ref for the destruction of al2g, one for el2g */ 313 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 314 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 315 el2g = al2g; 316 fl2g = NULL; 317 } 318 319 /* Start communication to drop connections for interior edges (for cc analysis only) */ 320 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 321 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 322 if (nedfieldlocal) { 323 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 324 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 325 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 326 } else { 327 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 328 } 329 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 330 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 331 332 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 333 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 334 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 335 if (global) { 336 PetscInt rst; 337 338 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 339 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 340 if (matis->sf_rootdata[i] < 2) { 341 matis->sf_rootdata[cum++] = i + rst; 342 } 343 } 344 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 345 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 346 } else { 347 PetscInt *tbz; 348 349 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 350 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 351 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 352 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 353 for (i=0,cum=0;i<ne;i++) 354 if (matis->sf_leafdata[idxs[i]] == 1) 355 tbz[cum++] = i; 356 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 357 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 358 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 359 ierr = PetscFree(tbz);CHKERRQ(ierr); 360 } 361 } else { /* we need the entire G to infer the nullspace */ 362 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 363 G = pcbddc->discretegradient; 364 } 365 366 /* Extract subdomain relevant rows of G */ 367 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 368 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 369 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISDestroy(&lned);CHKERRQ(ierr); 372 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 373 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 374 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 375 376 /* SF for nodal dofs communications */ 377 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 378 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 379 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 380 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 381 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 382 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 383 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 384 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 385 i = singular ? 2 : 1; 386 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 387 388 /* Destroy temporary G created in MATIS format and modified G */ 389 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 390 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 391 ierr = MatDestroy(&G);CHKERRQ(ierr); 392 393 if (print) { 394 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 395 ierr = MatView(lG,NULL);CHKERRQ(ierr); 396 } 397 398 /* Save lG for values insertion in change of basis */ 399 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 400 401 /* Analyze the edge-nodes connections (duplicate lG) */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 403 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 404 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 405 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 408 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 409 /* need to import the boundary specification to ensure the 410 proper detection of coarse edges' endpoints */ 411 if (pcbddc->DirichletBoundariesLocal) { 412 IS is; 413 414 if (fl2g) { 415 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 416 } else { 417 is = pcbddc->DirichletBoundariesLocal; 418 } 419 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 420 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 421 for (i=0;i<cum;i++) { 422 if (idxs[i] >= 0) { 423 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 424 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 425 } 426 } 427 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 428 if (fl2g) { 429 ierr = ISDestroy(&is);CHKERRQ(ierr); 430 } 431 } 432 if (pcbddc->NeumannBoundariesLocal) { 433 IS is; 434 435 if (fl2g) { 436 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 437 } else { 438 is = pcbddc->NeumannBoundariesLocal; 439 } 440 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 441 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 442 for (i=0;i<cum;i++) { 443 if (idxs[i] >= 0) { 444 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 445 } 446 } 447 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 448 if (fl2g) { 449 ierr = ISDestroy(&is);CHKERRQ(ierr); 450 } 451 } 452 453 /* Count neighs per dof */ 454 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 455 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 456 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 457 for (i=1,cum=0;i<n_neigh;i++) { 458 cum += n_shared[i]; 459 for (j=0;j<n_shared[i];j++) { 460 ecount[shared[i][j]]++; 461 } 462 } 463 if (ne) { 464 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 465 } 466 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 467 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 468 for (i=1;i<n_neigh;i++) { 469 for (j=0;j<n_shared[i];j++) { 470 PetscInt k = shared[i][j]; 471 eneighs[k][ecount[k]] = neigh[i]; 472 ecount[k]++; 473 } 474 } 475 for (i=0;i<ne;i++) { 476 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 477 } 478 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 479 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 480 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 481 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 for (i=1,cum=0;i<n_neigh;i++) { 483 cum += n_shared[i]; 484 for (j=0;j<n_shared[i];j++) { 485 vcount[shared[i][j]]++; 486 } 487 } 488 if (nv) { 489 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 490 } 491 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 492 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 493 for (i=1;i<n_neigh;i++) { 494 for (j=0;j<n_shared[i];j++) { 495 PetscInt k = shared[i][j]; 496 vneighs[k][vcount[k]] = neigh[i]; 497 vcount[k]++; 498 } 499 } 500 for (i=0;i<nv;i++) { 501 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 502 } 503 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 504 505 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 506 for proper detection of coarse edges' endpoints */ 507 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 508 for (i=0;i<ne;i++) { 509 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 510 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 511 } 512 } 513 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 514 if (!conforming) { 515 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 516 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 517 } 518 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 519 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 520 cum = 0; 521 for (i=0;i<ne;i++) { 522 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 523 if (!PetscBTLookup(btee,i)) { 524 marks[cum++] = i; 525 continue; 526 } 527 /* set badly connected edge dofs as primal */ 528 if (!conforming) { 529 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 530 marks[cum++] = i; 531 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 532 for (j=ii[i];j<ii[i+1];j++) { 533 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 534 } 535 } else { 536 /* every edge dofs should be connected trough a certain number of nodal dofs 537 to other edge dofs belonging to coarse edges 538 - at most 2 endpoints 539 - order-1 interior nodal dofs 540 - no undefined nodal dofs (nconn < order) 541 */ 542 PetscInt ends = 0,ints = 0, undef = 0; 543 for (j=ii[i];j<ii[i+1];j++) { 544 PetscInt v = jj[j],k; 545 PetscInt nconn = iit[v+1]-iit[v]; 546 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 547 if (nconn > order) ends++; 548 else if (nconn == order) ints++; 549 else undef++; 550 } 551 if (undef || ends > 2 || ints != order -1) { 552 marks[cum++] = i; 553 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 554 for (j=ii[i];j<ii[i+1];j++) { 555 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 556 } 557 } 558 } 559 } 560 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 561 if (!order && ii[i+1] != ii[i]) { 562 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 563 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 564 } 565 } 566 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 567 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 568 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 569 if (!conforming) { 570 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 571 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 572 } 573 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 574 575 /* identify splitpoints and corner candidates */ 576 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 577 if (print) { 578 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 579 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 580 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 581 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 582 } 583 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 584 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 585 for (i=0;i<nv;i++) { 586 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 587 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 588 if (!order) { /* variable order */ 589 PetscReal vorder = 0.; 590 591 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 592 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 593 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 594 ord = 1; 595 } 596 #if defined(PETSC_USE_DEBUG) 597 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 598 #endif 599 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 600 if (PetscBTLookup(btbd,jj[j])) { 601 bdir = PETSC_TRUE; 602 break; 603 } 604 if (vc != ecount[jj[j]]) { 605 sneighs = PETSC_FALSE; 606 } else { 607 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 608 for (k=0;k<vc;k++) { 609 if (vn[k] != en[k]) { 610 sneighs = PETSC_FALSE; 611 break; 612 } 613 } 614 } 615 } 616 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 617 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 618 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 619 } else if (test == ord) { 620 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 621 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 622 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 623 } else { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 625 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 626 } 627 } 628 } 629 ierr = PetscFree(ecount);CHKERRQ(ierr); 630 ierr = PetscFree(vcount);CHKERRQ(ierr); 631 if (ne) { 632 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 633 } 634 if (nv) { 635 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 636 } 637 ierr = PetscFree(eneighs);CHKERRQ(ierr); 638 ierr = PetscFree(vneighs);CHKERRQ(ierr); 639 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 640 641 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 642 if (order != 1) { 643 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 644 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 645 for (i=0;i<nv;i++) { 646 if (PetscBTLookup(btvcand,i)) { 647 PetscBool found = PETSC_FALSE; 648 for (j=ii[i];j<ii[i+1] && !found;j++) { 649 PetscInt k,e = jj[j]; 650 if (PetscBTLookup(bte,e)) continue; 651 for (k=iit[e];k<iit[e+1];k++) { 652 PetscInt v = jjt[k]; 653 if (v != i && PetscBTLookup(btvcand,v)) { 654 found = PETSC_TRUE; 655 break; 656 } 657 } 658 } 659 if (!found) { 660 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 661 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 662 } else { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 664 } 665 } 666 } 667 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 668 } 669 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 670 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 671 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 672 673 /* Get the local G^T explicitly */ 674 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 675 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 676 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 677 678 /* Mark interior nodal dofs */ 679 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 680 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 681 for (i=1;i<n_neigh;i++) { 682 for (j=0;j<n_shared[i];j++) { 683 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 684 } 685 } 686 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 687 688 /* communicate corners and splitpoints */ 689 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 690 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 691 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 692 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 693 694 if (print) { 695 IS tbz; 696 697 cum = 0; 698 for (i=0;i<nv;i++) 699 if (sfvleaves[i]) 700 vmarks[cum++] = i; 701 702 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 703 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 704 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 705 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 706 } 707 708 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 709 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 710 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 711 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 712 713 /* Zero rows of lGt corresponding to identified corners 714 and interior nodal dofs */ 715 cum = 0; 716 for (i=0;i<nv;i++) { 717 if (sfvleaves[i]) { 718 vmarks[cum++] = i; 719 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 720 } 721 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 722 } 723 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 724 if (print) { 725 IS tbz; 726 727 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 728 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 729 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 730 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 731 } 732 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 733 ierr = PetscFree(vmarks);CHKERRQ(ierr); 734 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 735 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 736 737 /* Recompute G */ 738 ierr = MatDestroy(&lG);CHKERRQ(ierr); 739 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 740 if (print) { 741 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 742 ierr = MatView(lG,NULL);CHKERRQ(ierr); 743 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 744 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 745 } 746 747 /* Get primal dofs (if any) */ 748 cum = 0; 749 for (i=0;i<ne;i++) { 750 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 751 } 752 if (fl2g) { 753 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 754 } 755 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 756 if (print) { 757 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 758 ierr = ISView(primals,NULL);CHKERRQ(ierr); 759 } 760 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 761 /* TODO: what if the user passed in some of them ? */ 762 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 763 ierr = ISDestroy(&primals);CHKERRQ(ierr); 764 765 /* Compute edge connectivity */ 766 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 767 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 768 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 769 if (fl2g) { 770 PetscBT btf; 771 PetscInt *iia,*jja,*iiu,*jju; 772 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 773 774 /* create CSR for all local dofs */ 775 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 776 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 777 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 778 iiu = pcbddc->mat_graph->xadj; 779 jju = pcbddc->mat_graph->adjncy; 780 } else if (pcbddc->use_local_adj) { 781 rest = PETSC_TRUE; 782 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 783 } else { 784 free = PETSC_TRUE; 785 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 786 iiu[0] = 0; 787 for (i=0;i<n;i++) { 788 iiu[i+1] = i+1; 789 jju[i] = -1; 790 } 791 } 792 793 /* import sizes of CSR */ 794 iia[0] = 0; 795 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 796 797 /* overwrite entries corresponding to the Nedelec field */ 798 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 799 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 800 for (i=0;i<ne;i++) { 801 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 802 iia[idxs[i]+1] = ii[i+1]-ii[i]; 803 } 804 805 /* iia in CSR */ 806 for (i=0;i<n;i++) iia[i+1] += iia[i]; 807 808 /* jja in CSR */ 809 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 810 for (i=0;i<n;i++) 811 if (!PetscBTLookup(btf,i)) 812 for (j=0;j<iiu[i+1]-iiu[i];j++) 813 jja[iia[i]+j] = jju[iiu[i]+j]; 814 815 /* map edge dofs connectivity */ 816 if (jj) { 817 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 818 for (i=0;i<ne;i++) { 819 PetscInt e = idxs[i]; 820 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 821 } 822 } 823 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 824 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 825 if (rest) { 826 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 827 } 828 if (free) { 829 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 830 } 831 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 832 } else { 833 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 834 } 835 836 /* Analyze interface for edge dofs */ 837 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 838 pcbddc->mat_graph->twodim = PETSC_FALSE; 839 840 /* Get coarse edges in the edge space */ 841 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 842 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 843 844 if (fl2g) { 845 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 846 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 847 for (i=0;i<nee;i++) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 849 } 850 } else { 851 eedges = alleedges; 852 primals = allprimals; 853 } 854 855 /* Mark fine edge dofs with their coarse edge id */ 856 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 857 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 858 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 859 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 860 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 861 if (print) { 862 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 863 ierr = ISView(primals,NULL);CHKERRQ(ierr); 864 } 865 866 maxsize = 0; 867 for (i=0;i<nee;i++) { 868 PetscInt size,mark = i+1; 869 870 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 871 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 872 for (j=0;j<size;j++) marks[idxs[j]] = mark; 873 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 874 maxsize = PetscMax(maxsize,size); 875 } 876 877 /* Find coarse edge endpoints */ 878 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 879 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 880 for (i=0;i<nee;i++) { 881 PetscInt mark = i+1,size; 882 883 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 884 if (!size && nedfieldlocal) continue; 885 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 886 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 887 if (print) { 888 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 889 ISView(eedges[i],NULL); 890 } 891 for (j=0;j<size;j++) { 892 PetscInt k, ee = idxs[j]; 893 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 894 for (k=ii[ee];k<ii[ee+1];k++) { 895 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 896 if (PetscBTLookup(btv,jj[k])) { 897 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 898 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 899 PetscInt k2; 900 PetscBool corner = PETSC_FALSE; 901 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 902 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 903 /* it's a corner if either is connected with an edge dof belonging to a different cc or 904 if the edge dof lie on the natural part of the boundary */ 905 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 906 corner = PETSC_TRUE; 907 break; 908 } 909 } 910 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 911 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 912 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 913 } else { 914 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 915 } 916 } 917 } 918 } 919 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 920 } 921 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 922 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 923 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 924 925 /* Reset marked primal dofs */ 926 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 927 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 928 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 929 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 930 931 /* Now use the initial lG */ 932 ierr = MatDestroy(&lG);CHKERRQ(ierr); 933 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 934 lG = lGinit; 935 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 936 937 /* Compute extended cols indices */ 938 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 939 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 940 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 941 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 942 i *= maxsize; 943 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 944 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 945 eerr = PETSC_FALSE; 946 for (i=0;i<nee;i++) { 947 PetscInt size,found = 0; 948 949 cum = 0; 950 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 951 if (!size && nedfieldlocal) continue; 952 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 953 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 954 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 955 for (j=0;j<size;j++) { 956 PetscInt k,ee = idxs[j]; 957 for (k=ii[ee];k<ii[ee+1];k++) { 958 PetscInt vv = jj[k]; 959 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 960 else if (!PetscBTLookupSet(btvc,vv)) found++; 961 } 962 } 963 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 964 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 965 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 966 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 967 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 968 /* it may happen that endpoints are not defined at this point 969 if it is the case, mark this edge for a second pass */ 970 if (cum != size -1 || found != 2) { 971 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 972 if (print) { 973 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 974 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 975 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 976 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 977 } 978 eerr = PETSC_TRUE; 979 } 980 } 981 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 982 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 983 if (done) { 984 PetscInt *newprimals; 985 986 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 987 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 988 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 989 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 990 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 991 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 992 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 993 for (i=0;i<nee;i++) { 994 PetscBool has_candidates = PETSC_FALSE; 995 if (PetscBTLookup(bter,i)) { 996 PetscInt size,mark = i+1; 997 998 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 999 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1000 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1001 for (j=0;j<size;j++) { 1002 PetscInt k,ee = idxs[j]; 1003 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1004 for (k=ii[ee];k<ii[ee+1];k++) { 1005 /* set all candidates located on the edge as corners */ 1006 if (PetscBTLookup(btvcand,jj[k])) { 1007 PetscInt k2,vv = jj[k]; 1008 has_candidates = PETSC_TRUE; 1009 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1010 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1011 /* set all edge dofs connected to candidate as primals */ 1012 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1013 if (marks[jjt[k2]] == mark) { 1014 PetscInt k3,ee2 = jjt[k2]; 1015 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1016 newprimals[cum++] = ee2; 1017 /* finally set the new corners */ 1018 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1019 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1020 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1021 } 1022 } 1023 } 1024 } else { 1025 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1026 } 1027 } 1028 } 1029 if (!has_candidates) { /* circular edge */ 1030 PetscInt k, ee = idxs[0],*tmarks; 1031 1032 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1033 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1034 for (k=ii[ee];k<ii[ee+1];k++) { 1035 PetscInt k2; 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1037 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1038 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1039 } 1040 for (j=0;j<size;j++) { 1041 if (tmarks[idxs[j]] > 1) { 1042 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1043 newprimals[cum++] = idxs[j]; 1044 } 1045 } 1046 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1047 } 1048 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1049 } 1050 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1051 } 1052 ierr = PetscFree(extcols);CHKERRQ(ierr); 1053 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1054 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1055 if (fl2g) { 1056 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1057 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1058 for (i=0;i<nee;i++) { 1059 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1060 } 1061 ierr = PetscFree(eedges);CHKERRQ(ierr); 1062 } 1063 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1064 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1065 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1066 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1067 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1068 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1069 pcbddc->mat_graph->twodim = PETSC_FALSE; 1070 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1071 if (fl2g) { 1072 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1073 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1074 for (i=0;i<nee;i++) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1076 } 1077 } else { 1078 eedges = alleedges; 1079 primals = allprimals; 1080 } 1081 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1082 1083 /* Mark again */ 1084 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1085 for (i=0;i<nee;i++) { 1086 PetscInt size,mark = i+1; 1087 1088 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1089 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1090 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1091 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1092 } 1093 if (print) { 1094 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1095 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1096 } 1097 1098 /* Recompute extended cols */ 1099 eerr = PETSC_FALSE; 1100 for (i=0;i<nee;i++) { 1101 PetscInt size; 1102 1103 cum = 0; 1104 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1105 if (!size && nedfieldlocal) continue; 1106 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1107 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1108 for (j=0;j<size;j++) { 1109 PetscInt k,ee = idxs[j]; 1110 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1111 } 1112 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1113 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1114 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1115 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1116 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1117 if (cum != size -1) { 1118 if (print) { 1119 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1120 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1122 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1123 } 1124 eerr = PETSC_TRUE; 1125 } 1126 } 1127 } 1128 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1129 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1130 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1131 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1132 /* an error should not occur at this point */ 1133 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1134 1135 /* Check the number of endpoints */ 1136 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1137 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1138 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1139 for (i=0;i<nee;i++) { 1140 PetscInt size, found = 0, gc[2]; 1141 1142 /* init with defaults */ 1143 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1144 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1145 if (!size && nedfieldlocal) continue; 1146 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1147 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1148 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1149 for (j=0;j<size;j++) { 1150 PetscInt k,ee = idxs[j]; 1151 for (k=ii[ee];k<ii[ee+1];k++) { 1152 PetscInt vv = jj[k]; 1153 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1154 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1155 corners[i*2+found++] = vv; 1156 } 1157 } 1158 } 1159 if (found != 2) { 1160 PetscInt e; 1161 if (fl2g) { 1162 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1163 } else { 1164 e = idxs[0]; 1165 } 1166 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1167 } 1168 1169 /* get primal dof index on this coarse edge */ 1170 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1171 if (gc[0] > gc[1]) { 1172 PetscInt swap = corners[2*i]; 1173 corners[2*i] = corners[2*i+1]; 1174 corners[2*i+1] = swap; 1175 } 1176 cedges[i] = idxs[size-1]; 1177 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1178 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1179 } 1180 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1181 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1182 1183 #if defined(PETSC_USE_DEBUG) 1184 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1185 not interfere with neighbouring coarse edges */ 1186 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1187 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1188 for (i=0;i<nv;i++) { 1189 PetscInt emax = 0,eemax = 0; 1190 1191 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1192 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1193 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1194 for (j=1;j<nee+1;j++) { 1195 if (emax < emarks[j]) { 1196 emax = emarks[j]; 1197 eemax = j; 1198 } 1199 } 1200 /* not relevant for edges */ 1201 if (!eemax) continue; 1202 1203 for (j=ii[i];j<ii[i+1];j++) { 1204 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1205 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1206 } 1207 } 1208 } 1209 ierr = PetscFree(emarks);CHKERRQ(ierr); 1210 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1211 #endif 1212 1213 /* Compute extended rows indices for edge blocks of the change of basis */ 1214 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1215 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1216 extmem *= maxsize; 1217 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1218 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1219 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1220 for (i=0;i<nv;i++) { 1221 PetscInt mark = 0,size,start; 1222 1223 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1224 for (j=ii[i];j<ii[i+1];j++) 1225 if (marks[jj[j]] && !mark) 1226 mark = marks[jj[j]]; 1227 1228 /* not relevant */ 1229 if (!mark) continue; 1230 1231 /* import extended row */ 1232 mark--; 1233 start = mark*extmem+extrowcum[mark]; 1234 size = ii[i+1]-ii[i]; 1235 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1236 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1237 extrowcum[mark] += size; 1238 } 1239 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1240 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1241 ierr = PetscFree(marks);CHKERRQ(ierr); 1242 1243 /* Compress extrows */ 1244 cum = 0; 1245 for (i=0;i<nee;i++) { 1246 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1247 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1248 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1249 cum = PetscMax(cum,size); 1250 } 1251 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1252 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1253 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1254 1255 /* Workspace for lapack inner calls and VecSetValues */ 1256 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1257 1258 /* Create change of basis matrix (preallocation can be improved) */ 1259 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1260 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1261 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1262 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1263 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1264 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1265 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1266 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1267 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1268 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1269 1270 /* Defaults to identity */ 1271 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1272 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1273 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1274 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1275 1276 /* Create discrete gradient for the coarser level if needed */ 1277 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1278 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1279 if (pcbddc->current_level < pcbddc->max_levels) { 1280 ISLocalToGlobalMapping cel2g,cvl2g; 1281 IS wis,gwis; 1282 PetscInt cnv,cne; 1283 1284 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1285 if (fl2g) { 1286 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1287 } else { 1288 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1289 pcbddc->nedclocal = wis; 1290 } 1291 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1292 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1293 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1294 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1297 1298 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1299 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1300 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1301 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1305 1306 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1307 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1308 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1309 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1310 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1311 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1312 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1313 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1314 } 1315 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1316 1317 #if defined(PRINT_GDET) 1318 inc = 0; 1319 lev = pcbddc->current_level; 1320 #endif 1321 1322 /* Insert values in the change of basis matrix */ 1323 for (i=0;i<nee;i++) { 1324 Mat Gins = NULL, GKins = NULL; 1325 IS cornersis = NULL; 1326 PetscScalar cvals[2]; 1327 1328 if (pcbddc->nedcG) { 1329 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1330 } 1331 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1332 if (Gins && GKins) { 1333 PetscScalar *data; 1334 const PetscInt *rows,*cols; 1335 PetscInt nrh,nch,nrc,ncc; 1336 1337 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1338 /* H1 */ 1339 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1340 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1341 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1342 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1343 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1344 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1345 /* complement */ 1346 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1347 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1348 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1349 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1350 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1351 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1352 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1353 1354 /* coarse discrete gradient */ 1355 if (pcbddc->nedcG) { 1356 PetscInt cols[2]; 1357 1358 cols[0] = 2*i; 1359 cols[1] = 2*i+1; 1360 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1361 } 1362 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1363 } 1364 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1365 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1366 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1367 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1368 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1369 } 1370 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1371 1372 /* Start assembling */ 1373 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1374 if (pcbddc->nedcG) { 1375 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1376 } 1377 1378 /* Free */ 1379 if (fl2g) { 1380 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1381 for (i=0;i<nee;i++) { 1382 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1383 } 1384 ierr = PetscFree(eedges);CHKERRQ(ierr); 1385 } 1386 1387 /* hack mat_graph with primal dofs on the coarse edges */ 1388 { 1389 PCBDDCGraph graph = pcbddc->mat_graph; 1390 PetscInt *oqueue = graph->queue; 1391 PetscInt *ocptr = graph->cptr; 1392 PetscInt ncc,*idxs; 1393 1394 /* find first primal edge */ 1395 if (pcbddc->nedclocal) { 1396 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1397 } else { 1398 if (fl2g) { 1399 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1400 } 1401 idxs = cedges; 1402 } 1403 cum = 0; 1404 while (cum < nee && cedges[cum] < 0) cum++; 1405 1406 /* adapt connected components */ 1407 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1408 graph->cptr[0] = 0; 1409 for (i=0,ncc=0;i<graph->ncc;i++) { 1410 PetscInt lc = ocptr[i+1]-ocptr[i]; 1411 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1412 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1413 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1414 ncc++; 1415 lc--; 1416 cum++; 1417 while (cum < nee && cedges[cum] < 0) cum++; 1418 } 1419 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1420 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1421 ncc++; 1422 } 1423 graph->ncc = ncc; 1424 if (pcbddc->nedclocal) { 1425 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1426 } 1427 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1428 } 1429 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1430 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1431 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1432 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1433 1434 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1435 ierr = PetscFree(extrow);CHKERRQ(ierr); 1436 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1437 ierr = PetscFree(corners);CHKERRQ(ierr); 1438 ierr = PetscFree(cedges);CHKERRQ(ierr); 1439 ierr = PetscFree(extrows);CHKERRQ(ierr); 1440 ierr = PetscFree(extcols);CHKERRQ(ierr); 1441 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1442 1443 /* Complete assembling */ 1444 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1445 if (pcbddc->nedcG) { 1446 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 #if 0 1448 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1449 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1450 #endif 1451 } 1452 1453 /* set change of basis */ 1454 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1455 ierr = MatDestroy(&T);CHKERRQ(ierr); 1456 1457 PetscFunctionReturn(0); 1458 } 1459 1460 /* the near-null space of BDDC carries information on quadrature weights, 1461 and these can be collinear -> so cheat with MatNullSpaceCreate 1462 and create a suitable set of basis vectors first */ 1463 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1464 { 1465 PetscErrorCode ierr; 1466 PetscInt i; 1467 1468 PetscFunctionBegin; 1469 for (i=0;i<nvecs;i++) { 1470 PetscInt first,last; 1471 1472 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1473 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1474 if (i>=first && i < last) { 1475 PetscScalar *data; 1476 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1477 if (!has_const) { 1478 data[i-first] = 1.; 1479 } else { 1480 data[2*i-first] = 1./PetscSqrtReal(2.); 1481 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1482 } 1483 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1484 } 1485 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1486 } 1487 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1488 for (i=0;i<nvecs;i++) { /* reset vectors */ 1489 PetscInt first,last; 1490 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1491 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1492 if (i>=first && i < last) { 1493 PetscScalar *data; 1494 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1495 if (!has_const) { 1496 data[i-first] = 0.; 1497 } else { 1498 data[2*i-first] = 0.; 1499 data[2*i-first+1] = 0.; 1500 } 1501 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1502 } 1503 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1504 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1505 } 1506 PetscFunctionReturn(0); 1507 } 1508 1509 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1510 { 1511 Mat loc_divudotp; 1512 Vec p,v,vins,quad_vec,*quad_vecs; 1513 ISLocalToGlobalMapping map; 1514 IS *faces,*edges; 1515 PetscScalar *vals; 1516 const PetscScalar *array; 1517 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1518 PetscMPIInt rank; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1523 if (graph->twodim) { 1524 lmaxneighs = 2; 1525 } else { 1526 lmaxneighs = 1; 1527 for (i=0;i<ne;i++) { 1528 const PetscInt *idxs; 1529 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1530 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1531 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1532 } 1533 lmaxneighs++; /* graph count does not include self */ 1534 } 1535 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1536 maxsize = 0; 1537 for (i=0;i<ne;i++) { 1538 PetscInt nn; 1539 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1540 maxsize = PetscMax(maxsize,nn); 1541 } 1542 for (i=0;i<nf;i++) { 1543 PetscInt nn; 1544 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1545 maxsize = PetscMax(maxsize,nn); 1546 } 1547 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1548 /* create vectors to hold quadrature weights */ 1549 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1550 if (!transpose) { 1551 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1552 } else { 1553 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1554 } 1555 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1556 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1557 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1558 for (i=0;i<maxneighs;i++) { 1559 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1560 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1561 } 1562 1563 /* compute local quad vec */ 1564 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1565 if (!transpose) { 1566 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1567 } else { 1568 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1569 } 1570 ierr = VecSet(p,1.);CHKERRQ(ierr); 1571 if (!transpose) { 1572 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1573 } else { 1574 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1575 } 1576 if (vl2l) { 1577 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1578 } else { 1579 vins = v; 1580 } 1581 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1582 ierr = VecDestroy(&p);CHKERRQ(ierr); 1583 1584 /* insert in global quadrature vecs */ 1585 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1586 for (i=0;i<nf;i++) { 1587 const PetscInt *idxs; 1588 PetscInt idx,nn,j; 1589 1590 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1591 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1592 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1593 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1594 idx = -(idx+1); 1595 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1596 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1597 } 1598 for (i=0;i<ne;i++) { 1599 const PetscInt *idxs; 1600 PetscInt idx,nn,j; 1601 1602 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1603 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1604 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1605 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1606 idx = -(idx+1); 1607 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1608 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1609 } 1610 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1611 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1612 if (vl2l) { 1613 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1614 } 1615 ierr = VecDestroy(&v);CHKERRQ(ierr); 1616 ierr = PetscFree(vals);CHKERRQ(ierr); 1617 1618 /* assemble near null space */ 1619 for (i=0;i<maxneighs;i++) { 1620 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1621 } 1622 for (i=0;i<maxneighs;i++) { 1623 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1624 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1625 } 1626 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1627 PetscFunctionReturn(0); 1628 } 1629 1630 1631 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1632 { 1633 PetscErrorCode ierr; 1634 Vec local,global; 1635 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1636 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1637 1638 PetscFunctionBegin; 1639 /* need to convert from global to local topology information and remove references to information in global ordering */ 1640 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1641 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1642 if (pcbddc->user_provided_isfordofs) { 1643 if (pcbddc->n_ISForDofs) { 1644 PetscInt i; 1645 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1646 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1647 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1648 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1649 } 1650 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1651 pcbddc->n_ISForDofs = 0; 1652 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1653 } 1654 } else { 1655 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1656 DM dm; 1657 1658 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1659 if (!dm) { 1660 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1661 } 1662 if (dm) { 1663 IS *fields; 1664 PetscInt nf,i; 1665 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1666 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1667 for (i=0;i<nf;i++) { 1668 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1669 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1670 } 1671 ierr = PetscFree(fields);CHKERRQ(ierr); 1672 pcbddc->n_ISForDofsLocal = nf; 1673 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1674 PetscContainer c; 1675 1676 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1677 if (c) { 1678 MatISLocalFields lf; 1679 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1680 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1681 } else { /* fallback, create the default fields if bs > 1 */ 1682 PetscInt i, n = matis->A->rmap->n; 1683 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1684 if (i > 1) { 1685 pcbddc->n_ISForDofsLocal = i; 1686 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1687 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1688 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1689 } 1690 } 1691 } 1692 } 1693 } else { 1694 PetscInt i; 1695 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1696 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1697 } 1698 } 1699 } 1700 1701 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1702 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1703 } else if (pcbddc->DirichletBoundariesLocal) { 1704 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1705 } 1706 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1707 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1708 } else if (pcbddc->NeumannBoundariesLocal) { 1709 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1710 } 1711 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1712 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1713 } 1714 ierr = VecDestroy(&global);CHKERRQ(ierr); 1715 ierr = VecDestroy(&local);CHKERRQ(ierr); 1716 1717 PetscFunctionReturn(0); 1718 } 1719 1720 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1721 { 1722 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1723 PetscErrorCode ierr; 1724 IS nis; 1725 const PetscInt *idxs; 1726 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1727 PetscBool *ld; 1728 1729 PetscFunctionBegin; 1730 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1731 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1732 if (mop == MPI_LAND) { 1733 /* init rootdata with true */ 1734 ld = (PetscBool*) matis->sf_rootdata; 1735 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1736 } else { 1737 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1738 } 1739 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1740 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1741 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1742 ld = (PetscBool*) matis->sf_leafdata; 1743 for (i=0;i<nd;i++) 1744 if (-1 < idxs[i] && idxs[i] < n) 1745 ld[idxs[i]] = PETSC_TRUE; 1746 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1747 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1748 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1749 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1750 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1751 if (mop == MPI_LAND) { 1752 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1753 } else { 1754 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1755 } 1756 for (i=0,nnd=0;i<n;i++) 1757 if (ld[i]) 1758 nidxs[nnd++] = i; 1759 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1760 ierr = ISDestroy(is);CHKERRQ(ierr); 1761 *is = nis; 1762 PetscFunctionReturn(0); 1763 } 1764 1765 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1766 { 1767 PC_IS *pcis = (PC_IS*)(pc->data); 1768 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 if (!pcbddc->benign_have_null) { 1773 PetscFunctionReturn(0); 1774 } 1775 if (pcbddc->ChangeOfBasisMatrix) { 1776 Vec swap; 1777 1778 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1779 swap = pcbddc->work_change; 1780 pcbddc->work_change = r; 1781 r = swap; 1782 } 1783 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1784 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1786 ierr = VecSet(z,0.);CHKERRQ(ierr); 1787 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1788 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1789 if (pcbddc->ChangeOfBasisMatrix) { 1790 pcbddc->work_change = r; 1791 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1792 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1793 } 1794 PetscFunctionReturn(0); 1795 } 1796 1797 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1798 { 1799 PCBDDCBenignMatMult_ctx ctx; 1800 PetscErrorCode ierr; 1801 PetscBool apply_right,apply_left,reset_x; 1802 1803 PetscFunctionBegin; 1804 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1805 if (transpose) { 1806 apply_right = ctx->apply_left; 1807 apply_left = ctx->apply_right; 1808 } else { 1809 apply_right = ctx->apply_right; 1810 apply_left = ctx->apply_left; 1811 } 1812 reset_x = PETSC_FALSE; 1813 if (apply_right) { 1814 const PetscScalar *ax; 1815 PetscInt nl,i; 1816 1817 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1818 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1819 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1820 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1821 for (i=0;i<ctx->benign_n;i++) { 1822 PetscScalar sum,val; 1823 const PetscInt *idxs; 1824 PetscInt nz,j; 1825 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1826 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1827 sum = 0.; 1828 if (ctx->apply_p0) { 1829 val = ctx->work[idxs[nz-1]]; 1830 for (j=0;j<nz-1;j++) { 1831 sum += ctx->work[idxs[j]]; 1832 ctx->work[idxs[j]] += val; 1833 } 1834 } else { 1835 for (j=0;j<nz-1;j++) { 1836 sum += ctx->work[idxs[j]]; 1837 } 1838 } 1839 ctx->work[idxs[nz-1]] -= sum; 1840 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1841 } 1842 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1843 reset_x = PETSC_TRUE; 1844 } 1845 if (transpose) { 1846 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1847 } else { 1848 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1849 } 1850 if (reset_x) { 1851 ierr = VecResetArray(x);CHKERRQ(ierr); 1852 } 1853 if (apply_left) { 1854 PetscScalar *ay; 1855 PetscInt i; 1856 1857 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1858 for (i=0;i<ctx->benign_n;i++) { 1859 PetscScalar sum,val; 1860 const PetscInt *idxs; 1861 PetscInt nz,j; 1862 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1863 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1864 val = -ay[idxs[nz-1]]; 1865 if (ctx->apply_p0) { 1866 sum = 0.; 1867 for (j=0;j<nz-1;j++) { 1868 sum += ay[idxs[j]]; 1869 ay[idxs[j]] += val; 1870 } 1871 ay[idxs[nz-1]] += sum; 1872 } else { 1873 for (j=0;j<nz-1;j++) { 1874 ay[idxs[j]] += val; 1875 } 1876 ay[idxs[nz-1]] = 0.; 1877 } 1878 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1879 } 1880 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1881 } 1882 PetscFunctionReturn(0); 1883 } 1884 1885 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1886 { 1887 PetscErrorCode ierr; 1888 1889 PetscFunctionBegin; 1890 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1895 { 1896 PetscErrorCode ierr; 1897 1898 PetscFunctionBegin; 1899 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1904 { 1905 PC_IS *pcis = (PC_IS*)pc->data; 1906 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1907 PCBDDCBenignMatMult_ctx ctx; 1908 PetscErrorCode ierr; 1909 1910 PetscFunctionBegin; 1911 if (!restore) { 1912 Mat A_IB,A_BI; 1913 PetscScalar *work; 1914 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1915 1916 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1917 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1918 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1919 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1920 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1921 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1922 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1923 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1924 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1925 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1926 ctx->apply_left = PETSC_TRUE; 1927 ctx->apply_right = PETSC_FALSE; 1928 ctx->apply_p0 = PETSC_FALSE; 1929 ctx->benign_n = pcbddc->benign_n; 1930 if (reuse) { 1931 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1932 ctx->free = PETSC_FALSE; 1933 } else { /* TODO: could be optimized for successive solves */ 1934 ISLocalToGlobalMapping N_to_D; 1935 PetscInt i; 1936 1937 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1938 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1939 for (i=0;i<pcbddc->benign_n;i++) { 1940 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1941 } 1942 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1943 ctx->free = PETSC_TRUE; 1944 } 1945 ctx->A = pcis->A_IB; 1946 ctx->work = work; 1947 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1948 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1949 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1950 pcis->A_IB = A_IB; 1951 1952 /* A_BI as A_IB^T */ 1953 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1954 pcbddc->benign_original_mat = pcis->A_BI; 1955 pcis->A_BI = A_BI; 1956 } else { 1957 if (!pcbddc->benign_original_mat) { 1958 PetscFunctionReturn(0); 1959 } 1960 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1961 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1962 pcis->A_IB = ctx->A; 1963 ctx->A = NULL; 1964 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1965 pcis->A_BI = pcbddc->benign_original_mat; 1966 pcbddc->benign_original_mat = NULL; 1967 if (ctx->free) { 1968 PetscInt i; 1969 for (i=0;i<ctx->benign_n;i++) { 1970 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1971 } 1972 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1973 } 1974 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1975 ierr = PetscFree(ctx);CHKERRQ(ierr); 1976 } 1977 PetscFunctionReturn(0); 1978 } 1979 1980 /* used just in bddc debug mode */ 1981 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1982 { 1983 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1984 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1985 Mat An; 1986 PetscErrorCode ierr; 1987 1988 PetscFunctionBegin; 1989 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1990 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1991 if (is1) { 1992 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1993 ierr = MatDestroy(&An);CHKERRQ(ierr); 1994 } else { 1995 *B = An; 1996 } 1997 PetscFunctionReturn(0); 1998 } 1999 2000 /* TODO: add reuse flag */ 2001 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2002 { 2003 Mat Bt; 2004 PetscScalar *a,*bdata; 2005 const PetscInt *ii,*ij; 2006 PetscInt m,n,i,nnz,*bii,*bij; 2007 PetscBool flg_row; 2008 PetscErrorCode ierr; 2009 2010 PetscFunctionBegin; 2011 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2012 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2013 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2014 nnz = n; 2015 for (i=0;i<ii[n];i++) { 2016 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2017 } 2018 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2019 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2020 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2021 nnz = 0; 2022 bii[0] = 0; 2023 for (i=0;i<n;i++) { 2024 PetscInt j; 2025 for (j=ii[i];j<ii[i+1];j++) { 2026 PetscScalar entry = a[j]; 2027 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2028 bij[nnz] = ij[j]; 2029 bdata[nnz] = entry; 2030 nnz++; 2031 } 2032 } 2033 bii[i+1] = nnz; 2034 } 2035 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2036 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2037 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2038 { 2039 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2040 b->free_a = PETSC_TRUE; 2041 b->free_ij = PETSC_TRUE; 2042 } 2043 *B = Bt; 2044 PetscFunctionReturn(0); 2045 } 2046 2047 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 2048 { 2049 Mat B; 2050 IS is_dummy,*cc_n; 2051 ISLocalToGlobalMapping l2gmap_dummy; 2052 PCBDDCGraph graph; 2053 PetscInt i,n; 2054 PetscInt *xadj,*adjncy; 2055 PetscInt *xadj_filtered,*adjncy_filtered; 2056 PetscBool flg_row,isseqaij; 2057 PetscErrorCode ierr; 2058 2059 PetscFunctionBegin; 2060 if (!A->rmap->N || !A->cmap->N) { 2061 *ncc = 0; 2062 *cc = NULL; 2063 PetscFunctionReturn(0); 2064 } 2065 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2066 if (!isseqaij && filter) { 2067 PetscBool isseqdense; 2068 2069 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2070 if (!isseqdense) { 2071 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2072 } else { /* TODO: rectangular case and LDA */ 2073 PetscScalar *array; 2074 PetscReal chop=1.e-6; 2075 2076 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2077 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2078 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2079 for (i=0;i<n;i++) { 2080 PetscInt j; 2081 for (j=i+1;j<n;j++) { 2082 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2083 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2084 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2085 } 2086 } 2087 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2088 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2089 } 2090 } else { 2091 B = A; 2092 } 2093 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2094 2095 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2096 if (filter) { 2097 PetscScalar *data; 2098 PetscInt j,cum; 2099 2100 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2101 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2102 cum = 0; 2103 for (i=0;i<n;i++) { 2104 PetscInt t; 2105 2106 for (j=xadj[i];j<xadj[i+1];j++) { 2107 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2108 continue; 2109 } 2110 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2111 } 2112 t = xadj_filtered[i]; 2113 xadj_filtered[i] = cum; 2114 cum += t; 2115 } 2116 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2117 } else { 2118 xadj_filtered = NULL; 2119 adjncy_filtered = NULL; 2120 } 2121 2122 /* compute local connected components using PCBDDCGraph */ 2123 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2124 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2125 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2126 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2127 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2128 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2129 if (xadj_filtered) { 2130 graph->xadj = xadj_filtered; 2131 graph->adjncy = adjncy_filtered; 2132 } else { 2133 graph->xadj = xadj; 2134 graph->adjncy = adjncy; 2135 } 2136 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2137 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2138 /* partial clean up */ 2139 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2140 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2141 if (A != B) { 2142 ierr = MatDestroy(&B);CHKERRQ(ierr); 2143 } 2144 2145 /* get back data */ 2146 if (ncc) *ncc = graph->ncc; 2147 if (cc) { 2148 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2149 for (i=0;i<graph->ncc;i++) { 2150 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2151 } 2152 *cc = cc_n; 2153 } 2154 /* clean up graph */ 2155 graph->xadj = 0; 2156 graph->adjncy = 0; 2157 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2158 PetscFunctionReturn(0); 2159 } 2160 2161 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2162 { 2163 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2164 PC_IS* pcis = (PC_IS*)(pc->data); 2165 IS dirIS = NULL; 2166 PetscInt i; 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2171 if (zerodiag) { 2172 Mat A; 2173 Vec vec3_N; 2174 PetscScalar *vals; 2175 const PetscInt *idxs; 2176 PetscInt nz,*count; 2177 2178 /* p0 */ 2179 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2180 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2181 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2182 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2183 for (i=0;i<nz;i++) vals[i] = 1.; 2184 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2185 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2186 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2187 /* v_I */ 2188 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2189 for (i=0;i<nz;i++) vals[i] = 0.; 2190 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2191 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2192 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2193 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2194 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2195 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2196 if (dirIS) { 2197 PetscInt n; 2198 2199 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2200 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2201 for (i=0;i<n;i++) vals[i] = 0.; 2202 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2203 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2204 } 2205 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2206 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2207 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2208 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2209 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2210 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2211 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2212 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2213 ierr = PetscFree(vals);CHKERRQ(ierr); 2214 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2215 2216 /* there should not be any pressure dofs lying on the interface */ 2217 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2218 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2219 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2220 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2221 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2222 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2223 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2224 ierr = PetscFree(count);CHKERRQ(ierr); 2225 } 2226 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2227 2228 /* check PCBDDCBenignGetOrSetP0 */ 2229 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2230 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2231 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2232 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2233 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2234 for (i=0;i<pcbddc->benign_n;i++) { 2235 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2236 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2237 } 2238 PetscFunctionReturn(0); 2239 } 2240 2241 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2242 { 2243 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2244 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2245 PetscInt nz,n; 2246 PetscInt *interior_dofs,n_interior_dofs,nneu; 2247 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2248 PetscErrorCode ierr; 2249 2250 PetscFunctionBegin; 2251 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2252 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2253 for (n=0;n<pcbddc->benign_n;n++) { 2254 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2255 } 2256 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2257 pcbddc->benign_n = 0; 2258 2259 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2260 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2261 Checks if all the pressure dofs in each subdomain have a zero diagonal 2262 If not, a change of basis on pressures is not needed 2263 since the local Schur complements are already SPD 2264 */ 2265 has_null_pressures = PETSC_TRUE; 2266 have_null = PETSC_TRUE; 2267 if (pcbddc->n_ISForDofsLocal) { 2268 IS iP = NULL; 2269 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2270 2271 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2272 ierr = PetscOptionsInt ("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2273 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2274 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2275 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2276 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2277 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2278 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2279 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2280 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2281 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2282 if (iP) { 2283 IS newpressures; 2284 2285 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2286 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2287 pressures = newpressures; 2288 } 2289 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2290 if (!sorted) { 2291 ierr = ISSort(pressures);CHKERRQ(ierr); 2292 } 2293 } else { 2294 pressures = NULL; 2295 } 2296 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2297 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2298 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2299 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2300 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2301 if (!sorted) { 2302 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2303 } 2304 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2305 zerodiag_save = zerodiag; 2306 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2307 if (!nz) { 2308 if (n) have_null = PETSC_FALSE; 2309 has_null_pressures = PETSC_FALSE; 2310 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2311 } 2312 recompute_zerodiag = PETSC_FALSE; 2313 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2314 zerodiag_subs = NULL; 2315 pcbddc->benign_n = 0; 2316 n_interior_dofs = 0; 2317 interior_dofs = NULL; 2318 nneu = 0; 2319 if (pcbddc->NeumannBoundariesLocal) { 2320 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2321 } 2322 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2323 if (checkb) { /* need to compute interior nodes */ 2324 PetscInt n,i,j; 2325 PetscInt n_neigh,*neigh,*n_shared,**shared; 2326 PetscInt *iwork; 2327 2328 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2329 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2330 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2331 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2332 for (i=1;i<n_neigh;i++) 2333 for (j=0;j<n_shared[i];j++) 2334 iwork[shared[i][j]] += 1; 2335 for (i=0;i<n;i++) 2336 if (!iwork[i]) 2337 interior_dofs[n_interior_dofs++] = i; 2338 ierr = PetscFree(iwork);CHKERRQ(ierr); 2339 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2340 } 2341 if (has_null_pressures) { 2342 IS *subs; 2343 PetscInt nsubs,i,j,nl; 2344 const PetscInt *idxs; 2345 PetscScalar *array; 2346 Vec *work; 2347 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2348 2349 subs = pcbddc->local_subs; 2350 nsubs = pcbddc->n_local_subs; 2351 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2352 if (checkb) { 2353 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2354 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2355 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2356 /* work[0] = 1_p */ 2357 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2358 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2359 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2360 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2361 /* work[0] = 1_v */ 2362 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2363 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2364 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2365 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2366 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2367 } 2368 if (nsubs > 1) { 2369 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2370 for (i=0;i<nsubs;i++) { 2371 ISLocalToGlobalMapping l2g; 2372 IS t_zerodiag_subs; 2373 PetscInt nl; 2374 2375 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2376 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2377 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2378 if (nl) { 2379 PetscBool valid = PETSC_TRUE; 2380 2381 if (checkb) { 2382 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2383 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2384 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2385 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2386 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2387 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2388 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2389 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2390 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2391 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2392 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2393 for (j=0;j<n_interior_dofs;j++) { 2394 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2395 valid = PETSC_FALSE; 2396 break; 2397 } 2398 } 2399 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2400 } 2401 if (valid && nneu) { 2402 const PetscInt *idxs; 2403 PetscInt nzb; 2404 2405 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2406 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2407 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2408 if (nzb) valid = PETSC_FALSE; 2409 } 2410 if (valid && pressures) { 2411 IS t_pressure_subs; 2412 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2413 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2414 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2415 } 2416 if (valid) { 2417 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2418 pcbddc->benign_n++; 2419 } else { 2420 recompute_zerodiag = PETSC_TRUE; 2421 } 2422 } 2423 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2424 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2425 } 2426 } else { /* there's just one subdomain (or zero if they have not been detected */ 2427 PetscBool valid = PETSC_TRUE; 2428 2429 if (nneu) valid = PETSC_FALSE; 2430 if (valid && pressures) { 2431 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2432 } 2433 if (valid && checkb) { 2434 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2435 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2436 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2437 for (j=0;j<n_interior_dofs;j++) { 2438 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2439 valid = PETSC_FALSE; 2440 break; 2441 } 2442 } 2443 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2444 } 2445 if (valid) { 2446 pcbddc->benign_n = 1; 2447 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2448 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2449 zerodiag_subs[0] = zerodiag; 2450 } 2451 } 2452 if (checkb) { 2453 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2454 } 2455 } 2456 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2457 2458 if (!pcbddc->benign_n) { 2459 PetscInt n; 2460 2461 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2462 recompute_zerodiag = PETSC_FALSE; 2463 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2464 if (n) { 2465 has_null_pressures = PETSC_FALSE; 2466 have_null = PETSC_FALSE; 2467 } 2468 } 2469 2470 /* final check for null pressures */ 2471 if (zerodiag && pressures) { 2472 PetscInt nz,np; 2473 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2474 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2475 if (nz != np) have_null = PETSC_FALSE; 2476 } 2477 2478 if (recompute_zerodiag) { 2479 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2480 if (pcbddc->benign_n == 1) { 2481 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2482 zerodiag = zerodiag_subs[0]; 2483 } else { 2484 PetscInt i,nzn,*new_idxs; 2485 2486 nzn = 0; 2487 for (i=0;i<pcbddc->benign_n;i++) { 2488 PetscInt ns; 2489 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2490 nzn += ns; 2491 } 2492 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2493 nzn = 0; 2494 for (i=0;i<pcbddc->benign_n;i++) { 2495 PetscInt ns,*idxs; 2496 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2497 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2498 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2499 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2500 nzn += ns; 2501 } 2502 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2503 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2504 } 2505 have_null = PETSC_FALSE; 2506 } 2507 2508 /* Prepare matrix to compute no-net-flux */ 2509 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2510 Mat A,loc_divudotp; 2511 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2512 IS row,col,isused = NULL; 2513 PetscInt M,N,n,st,n_isused; 2514 2515 if (pressures) { 2516 isused = pressures; 2517 } else { 2518 isused = zerodiag_save; 2519 } 2520 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2521 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2522 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2523 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2524 n_isused = 0; 2525 if (isused) { 2526 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2527 } 2528 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2529 st = st-n_isused; 2530 if (n) { 2531 const PetscInt *gidxs; 2532 2533 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2534 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2535 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2536 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2537 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2538 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2539 } else { 2540 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2541 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2542 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2543 } 2544 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2545 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2546 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2547 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2548 ierr = ISDestroy(&row);CHKERRQ(ierr); 2549 ierr = ISDestroy(&col);CHKERRQ(ierr); 2550 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2551 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2552 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2553 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2554 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2555 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2556 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2557 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2558 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2559 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 } 2561 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2562 2563 /* change of basis and p0 dofs */ 2564 if (has_null_pressures) { 2565 IS zerodiagc; 2566 const PetscInt *idxs,*idxsc; 2567 PetscInt i,s,*nnz; 2568 2569 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2570 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2571 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2572 /* local change of basis for pressures */ 2573 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2574 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2575 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2576 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2577 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2578 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2579 for (i=0;i<pcbddc->benign_n;i++) { 2580 PetscInt nzs,j; 2581 2582 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2583 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2584 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2585 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2586 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2587 } 2588 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2589 ierr = PetscFree(nnz);CHKERRQ(ierr); 2590 /* set identity on velocities */ 2591 for (i=0;i<n-nz;i++) { 2592 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2593 } 2594 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2595 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2596 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2597 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2598 /* set change on pressures */ 2599 for (s=0;s<pcbddc->benign_n;s++) { 2600 PetscScalar *array; 2601 PetscInt nzs; 2602 2603 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2604 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2605 for (i=0;i<nzs-1;i++) { 2606 PetscScalar vals[2]; 2607 PetscInt cols[2]; 2608 2609 cols[0] = idxs[i]; 2610 cols[1] = idxs[nzs-1]; 2611 vals[0] = 1.; 2612 vals[1] = 1.; 2613 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2614 } 2615 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2616 for (i=0;i<nzs-1;i++) array[i] = -1.; 2617 array[nzs-1] = 1.; 2618 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2619 /* store local idxs for p0 */ 2620 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2621 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2622 ierr = PetscFree(array);CHKERRQ(ierr); 2623 } 2624 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2625 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2626 /* project if needed */ 2627 if (pcbddc->benign_change_explicit) { 2628 Mat M; 2629 2630 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2631 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2632 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2633 ierr = MatDestroy(&M);CHKERRQ(ierr); 2634 } 2635 /* store global idxs for p0 */ 2636 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2637 } 2638 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2639 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2640 2641 /* determines if the coarse solver will be singular or not */ 2642 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2643 /* determines if the problem has subdomains with 0 pressure block */ 2644 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2645 *zerodiaglocal = zerodiag; 2646 PetscFunctionReturn(0); 2647 } 2648 2649 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2650 { 2651 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2652 PetscScalar *array; 2653 PetscErrorCode ierr; 2654 2655 PetscFunctionBegin; 2656 if (!pcbddc->benign_sf) { 2657 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2658 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2659 } 2660 if (get) { 2661 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2662 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2663 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2664 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2665 } else { 2666 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2667 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2668 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2669 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2670 } 2671 PetscFunctionReturn(0); 2672 } 2673 2674 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2675 { 2676 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2677 PetscErrorCode ierr; 2678 2679 PetscFunctionBegin; 2680 /* TODO: add error checking 2681 - avoid nested pop (or push) calls. 2682 - cannot push before pop. 2683 - cannot call this if pcbddc->local_mat is NULL 2684 */ 2685 if (!pcbddc->benign_n) { 2686 PetscFunctionReturn(0); 2687 } 2688 if (pop) { 2689 if (pcbddc->benign_change_explicit) { 2690 IS is_p0; 2691 MatReuse reuse; 2692 2693 /* extract B_0 */ 2694 reuse = MAT_INITIAL_MATRIX; 2695 if (pcbddc->benign_B0) { 2696 reuse = MAT_REUSE_MATRIX; 2697 } 2698 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2699 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2700 /* remove rows and cols from local problem */ 2701 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2702 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2703 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2704 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2705 } else { 2706 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2707 PetscScalar *vals; 2708 PetscInt i,n,*idxs_ins; 2709 2710 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2711 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2712 if (!pcbddc->benign_B0) { 2713 PetscInt *nnz; 2714 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2715 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2716 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2717 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2718 for (i=0;i<pcbddc->benign_n;i++) { 2719 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2720 nnz[i] = n - nnz[i]; 2721 } 2722 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2723 ierr = PetscFree(nnz);CHKERRQ(ierr); 2724 } 2725 2726 for (i=0;i<pcbddc->benign_n;i++) { 2727 PetscScalar *array; 2728 PetscInt *idxs,j,nz,cum; 2729 2730 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2731 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2732 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2733 for (j=0;j<nz;j++) vals[j] = 1.; 2734 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2735 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2736 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2737 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2738 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2739 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2740 cum = 0; 2741 for (j=0;j<n;j++) { 2742 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2743 vals[cum] = array[j]; 2744 idxs_ins[cum] = j; 2745 cum++; 2746 } 2747 } 2748 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2749 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2750 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2751 } 2752 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2753 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2754 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2755 } 2756 } else { /* push */ 2757 if (pcbddc->benign_change_explicit) { 2758 PetscInt i; 2759 2760 for (i=0;i<pcbddc->benign_n;i++) { 2761 PetscScalar *B0_vals; 2762 PetscInt *B0_cols,B0_ncol; 2763 2764 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2765 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2766 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2767 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2768 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2769 } 2770 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2771 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2772 } else { 2773 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2774 } 2775 } 2776 PetscFunctionReturn(0); 2777 } 2778 2779 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2780 { 2781 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2782 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2783 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2784 PetscBLASInt *B_iwork,*B_ifail; 2785 PetscScalar *work,lwork; 2786 PetscScalar *St,*S,*eigv; 2787 PetscScalar *Sarray,*Starray; 2788 PetscReal *eigs,thresh; 2789 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2790 PetscBool allocated_S_St; 2791 #if defined(PETSC_USE_COMPLEX) 2792 PetscReal *rwork; 2793 #endif 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2798 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2799 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2800 2801 if (pcbddc->dbg_flag) { 2802 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2803 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2804 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2805 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2806 } 2807 2808 if (pcbddc->dbg_flag) { 2809 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2810 } 2811 2812 /* max size of subsets */ 2813 mss = 0; 2814 for (i=0;i<sub_schurs->n_subs;i++) { 2815 PetscInt subset_size; 2816 2817 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2818 mss = PetscMax(mss,subset_size); 2819 } 2820 2821 /* min/max and threshold */ 2822 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2823 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2824 nmax = PetscMax(nmin,nmax); 2825 allocated_S_St = PETSC_FALSE; 2826 if (nmin) { 2827 allocated_S_St = PETSC_TRUE; 2828 } 2829 2830 /* allocate lapack workspace */ 2831 cum = cum2 = 0; 2832 maxneigs = 0; 2833 for (i=0;i<sub_schurs->n_subs;i++) { 2834 PetscInt n,subset_size; 2835 2836 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2837 n = PetscMin(subset_size,nmax); 2838 cum += subset_size; 2839 cum2 += subset_size*n; 2840 maxneigs = PetscMax(maxneigs,n); 2841 } 2842 if (mss) { 2843 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2844 PetscBLASInt B_itype = 1; 2845 PetscBLASInt B_N = mss; 2846 PetscReal zero = 0.0; 2847 PetscReal eps = 0.0; /* dlamch? */ 2848 2849 B_lwork = -1; 2850 S = NULL; 2851 St = NULL; 2852 eigs = NULL; 2853 eigv = NULL; 2854 B_iwork = NULL; 2855 B_ifail = NULL; 2856 #if defined(PETSC_USE_COMPLEX) 2857 rwork = NULL; 2858 #endif 2859 thresh = 1.0; 2860 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2861 #if defined(PETSC_USE_COMPLEX) 2862 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2863 #else 2864 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2865 #endif 2866 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2867 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2868 } else { 2869 /* TODO */ 2870 } 2871 } else { 2872 lwork = 0; 2873 } 2874 2875 nv = 0; 2876 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2877 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2878 } 2879 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2880 if (allocated_S_St) { 2881 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2882 } 2883 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2884 #if defined(PETSC_USE_COMPLEX) 2885 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2886 #endif 2887 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2888 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2889 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2890 nv+cum,&pcbddc->adaptive_constraints_idxs, 2891 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2892 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2893 2894 maxneigs = 0; 2895 cum = cumarray = 0; 2896 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2897 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2898 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2899 const PetscInt *idxs; 2900 2901 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2902 for (cum=0;cum<nv;cum++) { 2903 pcbddc->adaptive_constraints_n[cum] = 1; 2904 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2905 pcbddc->adaptive_constraints_data[cum] = 1.0; 2906 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2907 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2908 } 2909 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2910 } 2911 2912 if (mss) { /* multilevel */ 2913 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2914 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2915 } 2916 2917 thresh = pcbddc->adaptive_threshold; 2918 for (i=0;i<sub_schurs->n_subs;i++) { 2919 const PetscInt *idxs; 2920 PetscReal upper,lower; 2921 PetscInt j,subset_size,eigs_start = 0; 2922 PetscBLASInt B_N; 2923 PetscBool same_data = PETSC_FALSE; 2924 2925 if (pcbddc->use_deluxe_scaling) { 2926 upper = PETSC_MAX_REAL; 2927 lower = thresh; 2928 } else { 2929 upper = 1./thresh; 2930 lower = 0.; 2931 } 2932 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2933 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2934 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2935 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2936 if (sub_schurs->is_hermitian) { 2937 PetscInt j,k; 2938 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2939 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2940 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2941 } 2942 for (j=0;j<subset_size;j++) { 2943 for (k=j;k<subset_size;k++) { 2944 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2945 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2946 } 2947 } 2948 } else { 2949 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2950 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2951 } 2952 } else { 2953 S = Sarray + cumarray; 2954 St = Starray + cumarray; 2955 } 2956 /* see if we can save some work */ 2957 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2958 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2959 } 2960 2961 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2962 B_neigs = 0; 2963 } else { 2964 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2965 PetscBLASInt B_itype = 1; 2966 PetscBLASInt B_IL, B_IU; 2967 PetscReal eps = -1.0; /* dlamch? */ 2968 PetscInt nmin_s; 2969 PetscBool compute_range = PETSC_FALSE; 2970 2971 if (pcbddc->dbg_flag) { 2972 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2973 } 2974 2975 compute_range = PETSC_FALSE; 2976 if (thresh > 1.+PETSC_SMALL && !same_data) { 2977 compute_range = PETSC_TRUE; 2978 } 2979 2980 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2981 if (compute_range) { 2982 2983 /* ask for eigenvalues larger than thresh */ 2984 #if defined(PETSC_USE_COMPLEX) 2985 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2986 #else 2987 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2988 #endif 2989 } else if (!same_data) { 2990 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2991 B_IL = 1; 2992 #if defined(PETSC_USE_COMPLEX) 2993 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2994 #else 2995 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2996 #endif 2997 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2998 PetscInt k; 2999 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3000 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3001 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3002 nmin = nmax; 3003 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3004 for (k=0;k<nmax;k++) { 3005 eigs[k] = 1./PETSC_SMALL; 3006 eigv[k*(subset_size+1)] = 1.0; 3007 } 3008 } 3009 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3010 if (B_ierr) { 3011 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3012 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3013 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3014 } 3015 3016 if (B_neigs > nmax) { 3017 if (pcbddc->dbg_flag) { 3018 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3019 } 3020 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3021 B_neigs = nmax; 3022 } 3023 3024 nmin_s = PetscMin(nmin,B_N); 3025 if (B_neigs < nmin_s) { 3026 PetscBLASInt B_neigs2; 3027 3028 if (pcbddc->use_deluxe_scaling) { 3029 B_IL = B_N - nmin_s + 1; 3030 B_IU = B_N - B_neigs; 3031 } else { 3032 B_IL = B_neigs + 1; 3033 B_IU = nmin_s; 3034 } 3035 if (pcbddc->dbg_flag) { 3036 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3037 } 3038 if (sub_schurs->is_hermitian) { 3039 PetscInt j,k; 3040 for (j=0;j<subset_size;j++) { 3041 for (k=j;k<subset_size;k++) { 3042 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3043 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3044 } 3045 } 3046 } else { 3047 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3048 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3049 } 3050 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3051 #if defined(PETSC_USE_COMPLEX) 3052 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3053 #else 3054 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3055 #endif 3056 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3057 B_neigs += B_neigs2; 3058 } 3059 if (B_ierr) { 3060 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3061 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3062 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3063 } 3064 if (pcbddc->dbg_flag) { 3065 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3066 for (j=0;j<B_neigs;j++) { 3067 if (eigs[j] == 0.0) { 3068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3069 } else { 3070 if (pcbddc->use_deluxe_scaling) { 3071 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3072 } else { 3073 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3074 } 3075 } 3076 } 3077 } 3078 } else { 3079 /* TODO */ 3080 } 3081 } 3082 /* change the basis back to the original one */ 3083 if (sub_schurs->change) { 3084 Mat change,phi,phit; 3085 3086 if (pcbddc->dbg_flag > 1) { 3087 PetscInt ii; 3088 for (ii=0;ii<B_neigs;ii++) { 3089 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3090 for (j=0;j<B_N;j++) { 3091 #if defined(PETSC_USE_COMPLEX) 3092 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3093 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3095 #else 3096 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3097 #endif 3098 } 3099 } 3100 } 3101 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3102 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3103 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3104 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3105 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3106 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3107 } 3108 maxneigs = PetscMax(B_neigs,maxneigs); 3109 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3110 if (B_neigs) { 3111 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3112 3113 if (pcbddc->dbg_flag > 1) { 3114 PetscInt ii; 3115 for (ii=0;ii<B_neigs;ii++) { 3116 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3117 for (j=0;j<B_N;j++) { 3118 #if defined(PETSC_USE_COMPLEX) 3119 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3120 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3121 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3122 #else 3123 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3124 #endif 3125 } 3126 } 3127 } 3128 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3129 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3130 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3131 cum++; 3132 } 3133 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3134 /* shift for next computation */ 3135 cumarray += subset_size*subset_size; 3136 } 3137 if (pcbddc->dbg_flag) { 3138 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3139 } 3140 3141 if (mss) { 3142 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3143 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3144 /* destroy matrices (junk) */ 3145 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3146 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3147 } 3148 if (allocated_S_St) { 3149 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3150 } 3151 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3152 #if defined(PETSC_USE_COMPLEX) 3153 ierr = PetscFree(rwork);CHKERRQ(ierr); 3154 #endif 3155 if (pcbddc->dbg_flag) { 3156 PetscInt maxneigs_r; 3157 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3159 } 3160 PetscFunctionReturn(0); 3161 } 3162 3163 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3164 { 3165 PetscScalar *coarse_submat_vals; 3166 PetscErrorCode ierr; 3167 3168 PetscFunctionBegin; 3169 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3170 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3171 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3172 3173 /* Setup local neumann solver ksp_R */ 3174 /* PCBDDCSetUpLocalScatters should be called first! */ 3175 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3176 3177 /* 3178 Setup local correction and local part of coarse basis. 3179 Gives back the dense local part of the coarse matrix in column major ordering 3180 */ 3181 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3182 3183 /* Compute total number of coarse nodes and setup coarse solver */ 3184 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3185 3186 /* free */ 3187 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3188 PetscFunctionReturn(0); 3189 } 3190 3191 PetscErrorCode PCBDDCResetCustomization(PC pc) 3192 { 3193 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3194 PetscErrorCode ierr; 3195 3196 PetscFunctionBegin; 3197 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3198 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3199 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3200 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3201 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3202 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3203 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3204 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3205 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3206 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3207 PetscFunctionReturn(0); 3208 } 3209 3210 PetscErrorCode PCBDDCResetTopography(PC pc) 3211 { 3212 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3213 PetscInt i; 3214 PetscErrorCode ierr; 3215 3216 PetscFunctionBegin; 3217 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3218 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3219 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3220 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3221 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3222 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3223 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3224 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3225 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3226 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3227 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3228 for (i=0;i<pcbddc->n_local_subs;i++) { 3229 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3230 } 3231 pcbddc->n_local_subs = 0; 3232 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3233 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3234 pcbddc->graphanalyzed = PETSC_FALSE; 3235 pcbddc->recompute_topography = PETSC_TRUE; 3236 PetscFunctionReturn(0); 3237 } 3238 3239 PetscErrorCode PCBDDCResetSolvers(PC pc) 3240 { 3241 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3242 PetscErrorCode ierr; 3243 3244 PetscFunctionBegin; 3245 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3246 if (pcbddc->coarse_phi_B) { 3247 PetscScalar *array; 3248 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3249 ierr = PetscFree(array);CHKERRQ(ierr); 3250 } 3251 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3252 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3253 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3254 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3255 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3256 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3257 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3258 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3259 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3260 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3261 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3262 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3263 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3264 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3265 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3266 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3267 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3268 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3269 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3270 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3271 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3272 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3273 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3274 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3275 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3276 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3277 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3278 if (pcbddc->benign_zerodiag_subs) { 3279 PetscInt i; 3280 for (i=0;i<pcbddc->benign_n;i++) { 3281 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3282 } 3283 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3284 } 3285 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3286 PetscFunctionReturn(0); 3287 } 3288 3289 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3290 { 3291 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3292 PC_IS *pcis = (PC_IS*)pc->data; 3293 VecType impVecType; 3294 PetscInt n_constraints,n_R,old_size; 3295 PetscErrorCode ierr; 3296 3297 PetscFunctionBegin; 3298 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3299 n_R = pcis->n - pcbddc->n_vertices; 3300 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3301 /* local work vectors (try to avoid unneeded work)*/ 3302 /* R nodes */ 3303 old_size = -1; 3304 if (pcbddc->vec1_R) { 3305 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3306 } 3307 if (n_R != old_size) { 3308 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3309 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3310 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3311 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3312 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3313 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3314 } 3315 /* local primal dofs */ 3316 old_size = -1; 3317 if (pcbddc->vec1_P) { 3318 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3319 } 3320 if (pcbddc->local_primal_size != old_size) { 3321 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3322 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3323 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3324 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3325 } 3326 /* local explicit constraints */ 3327 old_size = -1; 3328 if (pcbddc->vec1_C) { 3329 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3330 } 3331 if (n_constraints && n_constraints != old_size) { 3332 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3333 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3334 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3335 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3336 } 3337 PetscFunctionReturn(0); 3338 } 3339 3340 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3341 { 3342 PetscErrorCode ierr; 3343 /* pointers to pcis and pcbddc */ 3344 PC_IS* pcis = (PC_IS*)pc->data; 3345 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3346 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3347 /* submatrices of local problem */ 3348 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3349 /* submatrices of local coarse problem */ 3350 Mat S_VV,S_CV,S_VC,S_CC; 3351 /* working matrices */ 3352 Mat C_CR; 3353 /* additional working stuff */ 3354 PC pc_R; 3355 Mat F; 3356 Vec dummy_vec; 3357 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3358 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3359 PetscScalar *work; 3360 PetscInt *idx_V_B; 3361 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3362 PetscInt i,n_R,n_D,n_B; 3363 3364 /* some shortcuts to scalars */ 3365 PetscScalar one=1.0,m_one=-1.0; 3366 3367 PetscFunctionBegin; 3368 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3369 3370 /* Set Non-overlapping dimensions */ 3371 n_vertices = pcbddc->n_vertices; 3372 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3373 n_B = pcis->n_B; 3374 n_D = pcis->n - n_B; 3375 n_R = pcis->n - n_vertices; 3376 3377 /* vertices in boundary numbering */ 3378 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3379 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3380 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3381 3382 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3383 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3384 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3385 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3386 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3387 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3388 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3389 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3390 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3391 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3392 3393 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3394 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3395 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3396 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3397 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3398 lda_rhs = n_R; 3399 need_benign_correction = PETSC_FALSE; 3400 if (isLU || isILU || isCHOL) { 3401 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3402 } else if (sub_schurs && sub_schurs->reuse_solver) { 3403 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3404 MatFactorType type; 3405 3406 F = reuse_solver->F; 3407 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3408 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3409 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3410 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3411 } else { 3412 F = NULL; 3413 } 3414 3415 /* allocate workspace */ 3416 n = 0; 3417 if (n_constraints) { 3418 n += lda_rhs*n_constraints; 3419 } 3420 if (n_vertices) { 3421 n = PetscMax(2*lda_rhs*n_vertices,n); 3422 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3423 } 3424 if (!pcbddc->symmetric_primal) { 3425 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3426 } 3427 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3428 3429 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3430 dummy_vec = NULL; 3431 if (need_benign_correction && lda_rhs != n_R && F) { 3432 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3433 } 3434 3435 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3436 if (n_constraints) { 3437 Mat M1,M2,M3,C_B; 3438 IS is_aux; 3439 PetscScalar *array,*array2; 3440 3441 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3442 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3443 3444 /* Extract constraints on R nodes: C_{CR} */ 3445 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3446 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3447 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3448 3449 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3450 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3451 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3452 for (i=0;i<n_constraints;i++) { 3453 const PetscScalar *row_cmat_values; 3454 const PetscInt *row_cmat_indices; 3455 PetscInt size_of_constraint,j; 3456 3457 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3458 for (j=0;j<size_of_constraint;j++) { 3459 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3460 } 3461 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3462 } 3463 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3464 if (F) { 3465 Mat B; 3466 3467 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3468 if (need_benign_correction) { 3469 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3470 3471 /* rhs is already zero on interior dofs, no need to change the rhs */ 3472 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3473 } 3474 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3475 if (need_benign_correction) { 3476 PetscScalar *marr; 3477 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3478 3479 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3480 if (lda_rhs != n_R) { 3481 for (i=0;i<n_constraints;i++) { 3482 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3483 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3484 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3485 } 3486 } else { 3487 for (i=0;i<n_constraints;i++) { 3488 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3489 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3490 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3491 } 3492 } 3493 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3494 } 3495 ierr = MatDestroy(&B);CHKERRQ(ierr); 3496 } else { 3497 PetscScalar *marr; 3498 3499 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3500 for (i=0;i<n_constraints;i++) { 3501 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3502 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3503 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3504 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3505 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3506 } 3507 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3508 } 3509 if (!pcbddc->switch_static) { 3510 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3511 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3512 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3513 for (i=0;i<n_constraints;i++) { 3514 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3515 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3516 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3517 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3518 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3519 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3520 } 3521 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3522 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3523 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3524 } else { 3525 if (lda_rhs != n_R) { 3526 IS dummy; 3527 3528 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3529 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3530 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3531 } else { 3532 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3533 pcbddc->local_auxmat2 = local_auxmat2_R; 3534 } 3535 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3536 } 3537 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3538 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3539 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3540 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3541 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3542 if (isCHOL) { 3543 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3544 } else { 3545 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3546 } 3547 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3548 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3549 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3550 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3551 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3552 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3553 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3554 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3555 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3556 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3557 } 3558 3559 /* Get submatrices from subdomain matrix */ 3560 if (n_vertices) { 3561 IS is_aux; 3562 3563 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3564 IS tis; 3565 3566 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3567 ierr = ISSort(tis);CHKERRQ(ierr); 3568 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3569 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3570 } else { 3571 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3572 } 3573 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3574 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3575 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3576 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3577 } 3578 3579 /* Matrix of coarse basis functions (local) */ 3580 if (pcbddc->coarse_phi_B) { 3581 PetscInt on_B,on_primal,on_D=n_D; 3582 if (pcbddc->coarse_phi_D) { 3583 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3584 } 3585 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3586 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3587 PetscScalar *marray; 3588 3589 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3590 ierr = PetscFree(marray);CHKERRQ(ierr); 3591 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3592 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3593 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3594 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3595 } 3596 } 3597 3598 if (!pcbddc->coarse_phi_B) { 3599 PetscScalar *marr; 3600 3601 /* memory size */ 3602 n = n_B*pcbddc->local_primal_size; 3603 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3604 if (!pcbddc->symmetric_primal) n *= 2; 3605 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3606 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3607 marr += n_B*pcbddc->local_primal_size; 3608 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3609 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3610 marr += n_D*pcbddc->local_primal_size; 3611 } 3612 if (!pcbddc->symmetric_primal) { 3613 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3614 marr += n_B*pcbddc->local_primal_size; 3615 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3616 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3617 } 3618 } else { 3619 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3620 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3621 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3622 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3623 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3624 } 3625 } 3626 } 3627 3628 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3629 p0_lidx_I = NULL; 3630 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3631 const PetscInt *idxs; 3632 3633 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3634 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3635 for (i=0;i<pcbddc->benign_n;i++) { 3636 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3637 } 3638 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3639 } 3640 3641 /* vertices */ 3642 if (n_vertices) { 3643 3644 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3645 3646 if (n_R) { 3647 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3648 PetscBLASInt B_N,B_one = 1; 3649 PetscScalar *x,*y; 3650 PetscBool isseqaij; 3651 3652 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3653 if (need_benign_correction) { 3654 ISLocalToGlobalMapping RtoN; 3655 IS is_p0; 3656 PetscInt *idxs_p0,n; 3657 3658 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3659 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3660 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3661 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3662 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3663 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3664 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3665 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3666 } 3667 3668 if (lda_rhs == n_R) { 3669 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3670 } else { 3671 PetscScalar *av,*array; 3672 const PetscInt *xadj,*adjncy; 3673 PetscInt n; 3674 PetscBool flg_row; 3675 3676 array = work+lda_rhs*n_vertices; 3677 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3678 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3679 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3680 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3681 for (i=0;i<n;i++) { 3682 PetscInt j; 3683 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3684 } 3685 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3686 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3687 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3688 } 3689 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3690 if (need_benign_correction) { 3691 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3692 PetscScalar *marr; 3693 3694 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3695 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3696 3697 | 0 0 0 | (V) 3698 L = | 0 0 -1 | (P-p0) 3699 | 0 0 -1 | (p0) 3700 3701 */ 3702 for (i=0;i<reuse_solver->benign_n;i++) { 3703 const PetscScalar *vals; 3704 const PetscInt *idxs,*idxs_zero; 3705 PetscInt n,j,nz; 3706 3707 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3708 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3709 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3710 for (j=0;j<n;j++) { 3711 PetscScalar val = vals[j]; 3712 PetscInt k,col = idxs[j]; 3713 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3714 } 3715 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3716 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3717 } 3718 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3719 } 3720 if (F) { 3721 /* need to correct the rhs */ 3722 if (need_benign_correction) { 3723 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3724 PetscScalar *marr; 3725 3726 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3727 if (lda_rhs != n_R) { 3728 for (i=0;i<n_vertices;i++) { 3729 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3730 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3731 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3732 } 3733 } else { 3734 for (i=0;i<n_vertices;i++) { 3735 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3736 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3737 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3738 } 3739 } 3740 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3741 } 3742 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3743 /* need to correct the solution */ 3744 if (need_benign_correction) { 3745 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3746 PetscScalar *marr; 3747 3748 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3749 if (lda_rhs != n_R) { 3750 for (i=0;i<n_vertices;i++) { 3751 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3752 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3753 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3754 } 3755 } else { 3756 for (i=0;i<n_vertices;i++) { 3757 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3758 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3759 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3760 } 3761 } 3762 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3763 } 3764 } else { 3765 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3766 for (i=0;i<n_vertices;i++) { 3767 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3768 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3769 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3770 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3771 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3772 } 3773 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3774 } 3775 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3776 /* S_VV and S_CV */ 3777 if (n_constraints) { 3778 Mat B; 3779 3780 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3781 for (i=0;i<n_vertices;i++) { 3782 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3783 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3784 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3785 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3786 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3787 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3788 } 3789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3790 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3791 ierr = MatDestroy(&B);CHKERRQ(ierr); 3792 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3793 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3794 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3795 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3796 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3797 ierr = MatDestroy(&B);CHKERRQ(ierr); 3798 } 3799 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3800 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3801 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3802 } 3803 if (lda_rhs != n_R) { 3804 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3805 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3806 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3807 } 3808 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3809 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3810 if (need_benign_correction) { 3811 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3812 PetscScalar *marr,*sums; 3813 3814 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3815 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3816 for (i=0;i<reuse_solver->benign_n;i++) { 3817 const PetscScalar *vals; 3818 const PetscInt *idxs,*idxs_zero; 3819 PetscInt n,j,nz; 3820 3821 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3822 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3823 for (j=0;j<n_vertices;j++) { 3824 PetscInt k; 3825 sums[j] = 0.; 3826 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3827 } 3828 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3829 for (j=0;j<n;j++) { 3830 PetscScalar val = vals[j]; 3831 PetscInt k; 3832 for (k=0;k<n_vertices;k++) { 3833 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3834 } 3835 } 3836 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3837 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3838 } 3839 ierr = PetscFree(sums);CHKERRQ(ierr); 3840 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3841 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3842 } 3843 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3844 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3845 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3846 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3847 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3848 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3849 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3850 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3851 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3852 } else { 3853 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3854 } 3855 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3856 3857 /* coarse basis functions */ 3858 for (i=0;i<n_vertices;i++) { 3859 PetscScalar *y; 3860 3861 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3862 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3863 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3864 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3865 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3866 y[n_B*i+idx_V_B[i]] = 1.0; 3867 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3868 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3869 3870 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3871 PetscInt j; 3872 3873 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3874 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3875 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3876 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3877 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3878 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3879 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3880 } 3881 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3882 } 3883 /* if n_R == 0 the object is not destroyed */ 3884 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3885 } 3886 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3887 3888 if (n_constraints) { 3889 Mat B; 3890 3891 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3892 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3893 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3894 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3895 if (n_vertices) { 3896 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3897 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3898 } else { 3899 Mat S_VCt; 3900 3901 if (lda_rhs != n_R) { 3902 ierr = MatDestroy(&B);CHKERRQ(ierr); 3903 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3904 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3905 } 3906 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3907 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3908 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3909 } 3910 } 3911 ierr = MatDestroy(&B);CHKERRQ(ierr); 3912 /* coarse basis functions */ 3913 for (i=0;i<n_constraints;i++) { 3914 PetscScalar *y; 3915 3916 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3917 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3918 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3919 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3920 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3921 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3922 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3923 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3924 PetscInt j; 3925 3926 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3927 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3928 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3929 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3930 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3931 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3932 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3933 } 3934 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3935 } 3936 } 3937 if (n_constraints) { 3938 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3939 } 3940 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3941 3942 /* coarse matrix entries relative to B_0 */ 3943 if (pcbddc->benign_n) { 3944 Mat B0_B,B0_BPHI; 3945 IS is_dummy; 3946 PetscScalar *data; 3947 PetscInt j; 3948 3949 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3950 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3951 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3952 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3953 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3954 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3955 for (j=0;j<pcbddc->benign_n;j++) { 3956 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3957 for (i=0;i<pcbddc->local_primal_size;i++) { 3958 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3959 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3960 } 3961 } 3962 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3963 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3964 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3965 } 3966 3967 /* compute other basis functions for non-symmetric problems */ 3968 if (!pcbddc->symmetric_primal) { 3969 Mat B_V=NULL,B_C=NULL; 3970 PetscScalar *marray; 3971 3972 if (n_constraints) { 3973 Mat S_CCT,C_CRT; 3974 3975 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 3976 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3977 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3978 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3979 if (n_vertices) { 3980 Mat S_VCT; 3981 3982 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3983 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3984 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3985 } 3986 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3987 } else { 3988 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3989 } 3990 if (n_vertices && n_R) { 3991 PetscScalar *av,*marray; 3992 const PetscInt *xadj,*adjncy; 3993 PetscInt n; 3994 PetscBool flg_row; 3995 3996 /* B_V = B_V - A_VR^T */ 3997 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3998 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3999 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4000 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4001 for (i=0;i<n;i++) { 4002 PetscInt j; 4003 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4004 } 4005 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4006 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4007 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4008 } 4009 4010 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4011 if (n_vertices) { 4012 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4013 for (i=0;i<n_vertices;i++) { 4014 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4015 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4016 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4017 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4018 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4019 } 4020 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4021 } 4022 if (B_C) { 4023 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4024 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4025 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4026 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4027 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4028 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4029 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4030 } 4031 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4032 } 4033 /* coarse basis functions */ 4034 for (i=0;i<pcbddc->local_primal_size;i++) { 4035 PetscScalar *y; 4036 4037 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4038 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4039 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4040 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4041 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4042 if (i<n_vertices) { 4043 y[n_B*i+idx_V_B[i]] = 1.0; 4044 } 4045 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4046 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4047 4048 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4049 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4050 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4051 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4052 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4053 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4054 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4055 } 4056 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4057 } 4058 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4059 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4060 } 4061 4062 /* free memory */ 4063 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4064 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4065 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4066 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4067 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4068 ierr = PetscFree(work);CHKERRQ(ierr); 4069 if (n_vertices) { 4070 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4071 } 4072 if (n_constraints) { 4073 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4074 } 4075 /* Checking coarse_sub_mat and coarse basis functios */ 4076 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4077 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4078 if (pcbddc->dbg_flag) { 4079 Mat coarse_sub_mat; 4080 Mat AUXMAT,TM1,TM2,TM3,TM4; 4081 Mat coarse_phi_D,coarse_phi_B; 4082 Mat coarse_psi_D,coarse_psi_B; 4083 Mat A_II,A_BB,A_IB,A_BI; 4084 Mat C_B,CPHI; 4085 IS is_dummy; 4086 Vec mones; 4087 MatType checkmattype=MATSEQAIJ; 4088 PetscReal real_value; 4089 4090 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4091 Mat A; 4092 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4093 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4094 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4095 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4096 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4097 ierr = MatDestroy(&A);CHKERRQ(ierr); 4098 } else { 4099 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4100 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4101 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4102 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4103 } 4104 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4105 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4106 if (!pcbddc->symmetric_primal) { 4107 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4108 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4109 } 4110 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4111 4112 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4114 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4115 if (!pcbddc->symmetric_primal) { 4116 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4117 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4118 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4119 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4120 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4121 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4122 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4123 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4124 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4125 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4126 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4127 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4128 } else { 4129 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4130 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4131 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4132 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4133 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4134 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4135 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4136 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4137 } 4138 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4139 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4140 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4141 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4142 if (pcbddc->benign_n) { 4143 Mat B0_B,B0_BPHI; 4144 PetscScalar *data,*data2; 4145 PetscInt j; 4146 4147 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4148 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4149 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4150 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4151 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4152 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4153 for (j=0;j<pcbddc->benign_n;j++) { 4154 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4155 for (i=0;i<pcbddc->local_primal_size;i++) { 4156 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4157 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4158 } 4159 } 4160 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4161 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4162 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4163 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4164 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4165 } 4166 #if 0 4167 { 4168 PetscViewer viewer; 4169 char filename[256]; 4170 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4171 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4172 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4173 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4174 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4175 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4176 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4177 if (save_change) { 4178 Mat phi_B; 4179 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4180 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4181 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4182 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4183 } else { 4184 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4185 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4186 } 4187 if (pcbddc->coarse_phi_D) { 4188 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4189 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4190 } 4191 if (pcbddc->coarse_psi_B) { 4192 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4193 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4194 } 4195 if (pcbddc->coarse_psi_D) { 4196 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4197 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4198 } 4199 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4200 } 4201 #endif 4202 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4203 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4204 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4205 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4206 4207 /* check constraints */ 4208 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4209 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4210 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4211 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4212 } else { 4213 PetscScalar *data; 4214 Mat tmat; 4215 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4216 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4217 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4218 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4219 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4220 } 4221 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4222 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4223 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4224 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4225 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4226 if (!pcbddc->symmetric_primal) { 4227 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4228 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4229 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4230 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4231 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4232 } 4233 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4234 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4235 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4236 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4237 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4238 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4239 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4240 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4241 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4242 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4243 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4244 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4245 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4246 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4247 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4248 if (!pcbddc->symmetric_primal) { 4249 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4250 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4251 } 4252 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4253 } 4254 /* get back data */ 4255 *coarse_submat_vals_n = coarse_submat_vals; 4256 PetscFunctionReturn(0); 4257 } 4258 4259 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4260 { 4261 Mat *work_mat; 4262 IS isrow_s,iscol_s; 4263 PetscBool rsorted,csorted; 4264 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4265 PetscErrorCode ierr; 4266 4267 PetscFunctionBegin; 4268 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4269 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4270 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4271 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4272 4273 if (!rsorted) { 4274 const PetscInt *idxs; 4275 PetscInt *idxs_sorted,i; 4276 4277 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4278 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4279 for (i=0;i<rsize;i++) { 4280 idxs_perm_r[i] = i; 4281 } 4282 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4283 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4284 for (i=0;i<rsize;i++) { 4285 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4286 } 4287 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4288 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4289 } else { 4290 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4291 isrow_s = isrow; 4292 } 4293 4294 if (!csorted) { 4295 if (isrow == iscol) { 4296 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4297 iscol_s = isrow_s; 4298 } else { 4299 const PetscInt *idxs; 4300 PetscInt *idxs_sorted,i; 4301 4302 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4303 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4304 for (i=0;i<csize;i++) { 4305 idxs_perm_c[i] = i; 4306 } 4307 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4308 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4309 for (i=0;i<csize;i++) { 4310 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4311 } 4312 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4313 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4314 } 4315 } else { 4316 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4317 iscol_s = iscol; 4318 } 4319 4320 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4321 4322 if (!rsorted || !csorted) { 4323 Mat new_mat; 4324 IS is_perm_r,is_perm_c; 4325 4326 if (!rsorted) { 4327 PetscInt *idxs_r,i; 4328 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4329 for (i=0;i<rsize;i++) { 4330 idxs_r[idxs_perm_r[i]] = i; 4331 } 4332 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4333 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4334 } else { 4335 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4336 } 4337 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4338 4339 if (!csorted) { 4340 if (isrow_s == iscol_s) { 4341 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4342 is_perm_c = is_perm_r; 4343 } else { 4344 PetscInt *idxs_c,i; 4345 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4346 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4347 for (i=0;i<csize;i++) { 4348 idxs_c[idxs_perm_c[i]] = i; 4349 } 4350 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4351 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4352 } 4353 } else { 4354 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4355 } 4356 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4357 4358 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4359 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4360 work_mat[0] = new_mat; 4361 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4362 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4363 } 4364 4365 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4366 *B = work_mat[0]; 4367 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4368 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4369 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4370 PetscFunctionReturn(0); 4371 } 4372 4373 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4374 { 4375 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4376 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4377 Mat new_mat,lA; 4378 IS is_local,is_global; 4379 PetscInt local_size; 4380 PetscBool isseqaij; 4381 PetscErrorCode ierr; 4382 4383 PetscFunctionBegin; 4384 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4385 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4386 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4387 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4388 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4389 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4390 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4391 4392 /* check */ 4393 if (pcbddc->dbg_flag) { 4394 Vec x,x_change; 4395 PetscReal error; 4396 4397 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4398 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4399 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4400 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4401 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4402 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4403 if (!pcbddc->change_interior) { 4404 const PetscScalar *x,*y,*v; 4405 PetscReal lerror = 0.; 4406 PetscInt i; 4407 4408 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4409 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4410 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4411 for (i=0;i<local_size;i++) 4412 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4413 lerror = PetscAbsScalar(x[i]-y[i]); 4414 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4415 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4416 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4417 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4418 if (error > PETSC_SMALL) { 4419 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4420 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4421 } else { 4422 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4423 } 4424 } 4425 } 4426 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4427 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4428 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4429 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4430 if (error > PETSC_SMALL) { 4431 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4432 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4433 } else { 4434 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4435 } 4436 } 4437 ierr = VecDestroy(&x);CHKERRQ(ierr); 4438 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4439 } 4440 4441 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4442 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4443 4444 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4445 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4446 if (isseqaij) { 4447 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4448 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4449 if (lA) { 4450 Mat work; 4451 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4452 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4453 ierr = MatDestroy(&work);CHKERRQ(ierr); 4454 } 4455 } else { 4456 Mat work_mat; 4457 4458 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4459 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4460 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4461 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4462 if (lA) { 4463 Mat work; 4464 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4465 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4466 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4467 ierr = MatDestroy(&work);CHKERRQ(ierr); 4468 } 4469 } 4470 if (matis->A->symmetric_set) { 4471 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4472 #if !defined(PETSC_USE_COMPLEX) 4473 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4474 #endif 4475 } 4476 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4477 PetscFunctionReturn(0); 4478 } 4479 4480 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4481 { 4482 PC_IS* pcis = (PC_IS*)(pc->data); 4483 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4484 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4485 PetscInt *idx_R_local=NULL; 4486 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4487 PetscInt vbs,bs; 4488 PetscBT bitmask=NULL; 4489 PetscErrorCode ierr; 4490 4491 PetscFunctionBegin; 4492 /* 4493 No need to setup local scatters if 4494 - primal space is unchanged 4495 AND 4496 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4497 AND 4498 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4499 */ 4500 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4501 PetscFunctionReturn(0); 4502 } 4503 /* destroy old objects */ 4504 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4505 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4506 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4507 /* Set Non-overlapping dimensions */ 4508 n_B = pcis->n_B; 4509 n_D = pcis->n - n_B; 4510 n_vertices = pcbddc->n_vertices; 4511 4512 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4513 4514 /* create auxiliary bitmask and allocate workspace */ 4515 if (!sub_schurs || !sub_schurs->reuse_solver) { 4516 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4517 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4518 for (i=0;i<n_vertices;i++) { 4519 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4520 } 4521 4522 for (i=0, n_R=0; i<pcis->n; i++) { 4523 if (!PetscBTLookup(bitmask,i)) { 4524 idx_R_local[n_R++] = i; 4525 } 4526 } 4527 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4528 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4529 4530 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4531 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4532 } 4533 4534 /* Block code */ 4535 vbs = 1; 4536 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4537 if (bs>1 && !(n_vertices%bs)) { 4538 PetscBool is_blocked = PETSC_TRUE; 4539 PetscInt *vary; 4540 if (!sub_schurs || !sub_schurs->reuse_solver) { 4541 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4542 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4543 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4544 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4545 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4546 for (i=0; i<pcis->n/bs; i++) { 4547 if (vary[i]!=0 && vary[i]!=bs) { 4548 is_blocked = PETSC_FALSE; 4549 break; 4550 } 4551 } 4552 ierr = PetscFree(vary);CHKERRQ(ierr); 4553 } else { 4554 /* Verify directly the R set */ 4555 for (i=0; i<n_R/bs; i++) { 4556 PetscInt j,node=idx_R_local[bs*i]; 4557 for (j=1; j<bs; j++) { 4558 if (node != idx_R_local[bs*i+j]-j) { 4559 is_blocked = PETSC_FALSE; 4560 break; 4561 } 4562 } 4563 } 4564 } 4565 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4566 vbs = bs; 4567 for (i=0;i<n_R/vbs;i++) { 4568 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4569 } 4570 } 4571 } 4572 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4573 if (sub_schurs && sub_schurs->reuse_solver) { 4574 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4575 4576 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4577 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4578 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4579 reuse_solver->is_R = pcbddc->is_R_local; 4580 } else { 4581 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4582 } 4583 4584 /* print some info if requested */ 4585 if (pcbddc->dbg_flag) { 4586 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4587 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4588 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4590 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4591 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4593 } 4594 4595 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4596 if (!sub_schurs || !sub_schurs->reuse_solver) { 4597 IS is_aux1,is_aux2; 4598 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4599 4600 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4601 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4602 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4603 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4604 for (i=0; i<n_D; i++) { 4605 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4606 } 4607 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4608 for (i=0, j=0; i<n_R; i++) { 4609 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4610 aux_array1[j++] = i; 4611 } 4612 } 4613 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4614 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4615 for (i=0, j=0; i<n_B; i++) { 4616 if (!PetscBTLookup(bitmask,is_indices[i])) { 4617 aux_array2[j++] = i; 4618 } 4619 } 4620 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4621 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4622 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4623 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4624 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4625 4626 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4627 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4628 for (i=0, j=0; i<n_R; i++) { 4629 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4630 aux_array1[j++] = i; 4631 } 4632 } 4633 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4634 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4635 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4636 } 4637 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4638 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4639 } else { 4640 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4641 IS tis; 4642 PetscInt schur_size; 4643 4644 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4645 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4646 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4647 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4648 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4649 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4650 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4651 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4652 } 4653 } 4654 PetscFunctionReturn(0); 4655 } 4656 4657 4658 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4659 { 4660 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4661 PC_IS *pcis = (PC_IS*)pc->data; 4662 PC pc_temp; 4663 Mat A_RR; 4664 MatReuse reuse; 4665 PetscScalar m_one = -1.0; 4666 PetscReal value; 4667 PetscInt n_D,n_R; 4668 PetscBool check_corr[2],issbaij; 4669 PetscErrorCode ierr; 4670 /* prefixes stuff */ 4671 char dir_prefix[256],neu_prefix[256],str_level[16]; 4672 size_t len; 4673 4674 PetscFunctionBegin; 4675 4676 /* compute prefixes */ 4677 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4678 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4679 if (!pcbddc->current_level) { 4680 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4681 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4682 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4683 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4684 } else { 4685 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4686 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4687 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4688 len -= 15; /* remove "pc_bddc_coarse_" */ 4689 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4690 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4691 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4692 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4693 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4694 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4695 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4696 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4697 } 4698 4699 /* DIRICHLET PROBLEM */ 4700 if (dirichlet) { 4701 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4702 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4703 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4704 if (pcbddc->dbg_flag) { 4705 Mat A_IIn; 4706 4707 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4708 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4709 pcis->A_II = A_IIn; 4710 } 4711 } 4712 if (pcbddc->local_mat->symmetric_set) { 4713 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4714 } 4715 /* Matrix for Dirichlet problem is pcis->A_II */ 4716 n_D = pcis->n - pcis->n_B; 4717 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4718 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4719 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4720 /* default */ 4721 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4722 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4723 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4724 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4725 if (issbaij) { 4726 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4727 } else { 4728 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4729 } 4730 /* Allow user's customization */ 4731 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4732 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4733 } 4734 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4735 if (sub_schurs && sub_schurs->reuse_solver) { 4736 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4737 4738 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4739 } 4740 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4741 if (!n_D) { 4742 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4743 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4744 } 4745 /* Set Up KSP for Dirichlet problem of BDDC */ 4746 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4747 /* set ksp_D into pcis data */ 4748 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4749 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4750 pcis->ksp_D = pcbddc->ksp_D; 4751 } 4752 4753 /* NEUMANN PROBLEM */ 4754 A_RR = 0; 4755 if (neumann) { 4756 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4757 PetscInt ibs,mbs; 4758 PetscBool issbaij; 4759 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4760 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4761 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4762 if (pcbddc->ksp_R) { /* already created ksp */ 4763 PetscInt nn_R; 4764 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4765 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4766 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4767 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4768 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4769 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4770 reuse = MAT_INITIAL_MATRIX; 4771 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4772 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4773 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4774 reuse = MAT_INITIAL_MATRIX; 4775 } else { /* safe to reuse the matrix */ 4776 reuse = MAT_REUSE_MATRIX; 4777 } 4778 } 4779 /* last check */ 4780 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4781 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4782 reuse = MAT_INITIAL_MATRIX; 4783 } 4784 } else { /* first time, so we need to create the matrix */ 4785 reuse = MAT_INITIAL_MATRIX; 4786 } 4787 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4788 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4789 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4790 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4791 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4792 if (matis->A == pcbddc->local_mat) { 4793 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4794 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4795 } else { 4796 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4797 } 4798 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4799 if (matis->A == pcbddc->local_mat) { 4800 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4801 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4802 } else { 4803 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4804 } 4805 } 4806 /* extract A_RR */ 4807 if (sub_schurs && sub_schurs->reuse_solver) { 4808 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4809 4810 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4811 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4812 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4813 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4814 } else { 4815 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4816 } 4817 } else { 4818 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4819 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4820 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4821 } 4822 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4823 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4824 } 4825 if (pcbddc->local_mat->symmetric_set) { 4826 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4827 } 4828 if (!pcbddc->ksp_R) { /* create object if not present */ 4829 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4830 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4831 /* default */ 4832 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4833 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4834 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4835 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4836 if (issbaij) { 4837 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4838 } else { 4839 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4840 } 4841 /* Allow user's customization */ 4842 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4843 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4844 } 4845 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4846 if (!n_R) { 4847 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4848 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4849 } 4850 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4851 /* Reuse solver if it is present */ 4852 if (sub_schurs && sub_schurs->reuse_solver && sub_schurs->A == pcbddc->local_mat) { 4853 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4854 4855 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4856 } 4857 /* Set Up KSP for Neumann problem of BDDC */ 4858 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4859 } 4860 4861 if (pcbddc->dbg_flag) { 4862 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4863 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4864 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4865 } 4866 4867 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4868 check_corr[0] = check_corr[1] = PETSC_FALSE; 4869 if (pcbddc->NullSpace_corr[0]) { 4870 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4871 } 4872 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4873 check_corr[0] = PETSC_TRUE; 4874 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4875 } 4876 if (neumann && pcbddc->NullSpace_corr[2]) { 4877 check_corr[1] = PETSC_TRUE; 4878 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4879 } 4880 4881 /* check Dirichlet and Neumann solvers */ 4882 if (pcbddc->dbg_flag) { 4883 if (dirichlet) { /* Dirichlet */ 4884 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4885 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4886 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4887 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4888 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4889 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4890 if (check_corr[0]) { 4891 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4892 } 4893 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4894 } 4895 if (neumann) { /* Neumann */ 4896 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4897 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4898 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4899 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4900 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4901 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4902 if (check_corr[1]) { 4903 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4904 } 4905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4906 } 4907 } 4908 /* free Neumann problem's matrix */ 4909 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4910 PetscFunctionReturn(0); 4911 } 4912 4913 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4914 { 4915 PetscErrorCode ierr; 4916 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4917 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4918 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4919 4920 PetscFunctionBegin; 4921 if (!reuse_solver) { 4922 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4923 } 4924 if (!pcbddc->switch_static) { 4925 if (applytranspose && pcbddc->local_auxmat1) { 4926 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4927 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4928 } 4929 if (!reuse_solver) { 4930 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4931 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4932 } else { 4933 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4934 4935 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4936 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4937 } 4938 } else { 4939 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4940 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4941 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4942 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4943 if (applytranspose && pcbddc->local_auxmat1) { 4944 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4945 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4946 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4947 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4948 } 4949 } 4950 if (!reuse_solver || pcbddc->switch_static) { 4951 if (applytranspose) { 4952 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4953 } else { 4954 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4955 } 4956 } else { 4957 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4958 4959 if (applytranspose) { 4960 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4961 } else { 4962 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4963 } 4964 } 4965 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4966 if (!pcbddc->switch_static) { 4967 if (!reuse_solver) { 4968 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4969 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4970 } else { 4971 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4972 4973 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4974 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4975 } 4976 if (!applytranspose && pcbddc->local_auxmat1) { 4977 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4978 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4979 } 4980 } else { 4981 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4982 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4983 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4984 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4985 if (!applytranspose && pcbddc->local_auxmat1) { 4986 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4987 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4988 } 4989 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4990 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4991 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4992 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4993 } 4994 PetscFunctionReturn(0); 4995 } 4996 4997 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4998 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4999 { 5000 PetscErrorCode ierr; 5001 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5002 PC_IS* pcis = (PC_IS*) (pc->data); 5003 const PetscScalar zero = 0.0; 5004 5005 PetscFunctionBegin; 5006 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5007 if (!pcbddc->benign_apply_coarse_only) { 5008 if (applytranspose) { 5009 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5010 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5011 } else { 5012 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5013 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5014 } 5015 } else { 5016 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5017 } 5018 5019 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5020 if (pcbddc->benign_n) { 5021 PetscScalar *array; 5022 PetscInt j; 5023 5024 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5025 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5026 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5027 } 5028 5029 /* start communications from local primal nodes to rhs of coarse solver */ 5030 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5031 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5032 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5033 5034 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5035 if (pcbddc->coarse_ksp) { 5036 Mat coarse_mat; 5037 Vec rhs,sol; 5038 MatNullSpace nullsp; 5039 PetscBool isbddc = PETSC_FALSE; 5040 5041 if (pcbddc->benign_have_null) { 5042 PC coarse_pc; 5043 5044 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5045 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5046 /* we need to propagate to coarser levels the need for a possible benign correction */ 5047 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5048 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5049 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5050 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5051 } 5052 } 5053 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5054 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5055 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5056 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5057 if (nullsp) { 5058 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5059 } 5060 if (applytranspose) { 5061 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5062 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5063 } else { 5064 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5065 PC coarse_pc; 5066 5067 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5068 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5069 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5070 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5071 } else { 5072 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5073 } 5074 } 5075 /* we don't need the benign correction at coarser levels anymore */ 5076 if (pcbddc->benign_have_null && isbddc) { 5077 PC coarse_pc; 5078 PC_BDDC* coarsepcbddc; 5079 5080 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5081 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5082 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5083 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5084 } 5085 if (nullsp) { 5086 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5087 } 5088 } 5089 5090 /* Local solution on R nodes */ 5091 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5092 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5093 } 5094 /* communications from coarse sol to local primal nodes */ 5095 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5096 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5097 5098 /* Sum contributions from the two levels */ 5099 if (!pcbddc->benign_apply_coarse_only) { 5100 if (applytranspose) { 5101 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5102 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5103 } else { 5104 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5105 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5106 } 5107 /* store p0 */ 5108 if (pcbddc->benign_n) { 5109 PetscScalar *array; 5110 PetscInt j; 5111 5112 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5113 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5114 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5115 } 5116 } else { /* expand the coarse solution */ 5117 if (applytranspose) { 5118 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5119 } else { 5120 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5121 } 5122 } 5123 PetscFunctionReturn(0); 5124 } 5125 5126 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5127 { 5128 PetscErrorCode ierr; 5129 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5130 PetscScalar *array; 5131 Vec from,to; 5132 5133 PetscFunctionBegin; 5134 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5135 from = pcbddc->coarse_vec; 5136 to = pcbddc->vec1_P; 5137 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5138 Vec tvec; 5139 5140 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5141 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5142 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5143 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5144 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5145 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5146 } 5147 } else { /* from local to global -> put data in coarse right hand side */ 5148 from = pcbddc->vec1_P; 5149 to = pcbddc->coarse_vec; 5150 } 5151 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5152 PetscFunctionReturn(0); 5153 } 5154 5155 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5156 { 5157 PetscErrorCode ierr; 5158 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5159 PetscScalar *array; 5160 Vec from,to; 5161 5162 PetscFunctionBegin; 5163 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5164 from = pcbddc->coarse_vec; 5165 to = pcbddc->vec1_P; 5166 } else { /* from local to global -> put data in coarse right hand side */ 5167 from = pcbddc->vec1_P; 5168 to = pcbddc->coarse_vec; 5169 } 5170 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5171 if (smode == SCATTER_FORWARD) { 5172 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5173 Vec tvec; 5174 5175 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5176 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5177 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5178 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5179 } 5180 } else { 5181 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5182 ierr = VecResetArray(from);CHKERRQ(ierr); 5183 } 5184 } 5185 PetscFunctionReturn(0); 5186 } 5187 5188 /* uncomment for testing purposes */ 5189 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5190 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5191 { 5192 PetscErrorCode ierr; 5193 PC_IS* pcis = (PC_IS*)(pc->data); 5194 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5195 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5196 /* one and zero */ 5197 PetscScalar one=1.0,zero=0.0; 5198 /* space to store constraints and their local indices */ 5199 PetscScalar *constraints_data; 5200 PetscInt *constraints_idxs,*constraints_idxs_B; 5201 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5202 PetscInt *constraints_n; 5203 /* iterators */ 5204 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5205 /* BLAS integers */ 5206 PetscBLASInt lwork,lierr; 5207 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5208 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5209 /* reuse */ 5210 PetscInt olocal_primal_size,olocal_primal_size_cc; 5211 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5212 /* change of basis */ 5213 PetscBool qr_needed; 5214 PetscBT change_basis,qr_needed_idx; 5215 /* auxiliary stuff */ 5216 PetscInt *nnz,*is_indices; 5217 PetscInt ncc; 5218 /* some quantities */ 5219 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5220 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5221 5222 PetscFunctionBegin; 5223 /* Destroy Mat objects computed previously */ 5224 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5225 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5226 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5227 /* save info on constraints from previous setup (if any) */ 5228 olocal_primal_size = pcbddc->local_primal_size; 5229 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5230 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5231 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5232 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5233 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5234 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5235 5236 if (!pcbddc->adaptive_selection) { 5237 IS ISForVertices,*ISForFaces,*ISForEdges; 5238 MatNullSpace nearnullsp; 5239 const Vec *nearnullvecs; 5240 Vec *localnearnullsp; 5241 PetscScalar *array; 5242 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5243 PetscBool nnsp_has_cnst; 5244 /* LAPACK working arrays for SVD or POD */ 5245 PetscBool skip_lapack,boolforchange; 5246 PetscScalar *work; 5247 PetscReal *singular_vals; 5248 #if defined(PETSC_USE_COMPLEX) 5249 PetscReal *rwork; 5250 #endif 5251 #if defined(PETSC_MISSING_LAPACK_GESVD) 5252 PetscScalar *temp_basis,*correlation_mat; 5253 #else 5254 PetscBLASInt dummy_int=1; 5255 PetscScalar dummy_scalar=1.; 5256 #endif 5257 5258 /* Get index sets for faces, edges and vertices from graph */ 5259 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5260 /* print some info */ 5261 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5262 PetscInt nv; 5263 5264 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5265 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5266 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5270 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5271 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5272 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5273 } 5274 5275 /* free unneeded index sets */ 5276 if (!pcbddc->use_vertices) { 5277 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5278 } 5279 if (!pcbddc->use_edges) { 5280 for (i=0;i<n_ISForEdges;i++) { 5281 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5282 } 5283 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5284 n_ISForEdges = 0; 5285 } 5286 if (!pcbddc->use_faces) { 5287 for (i=0;i<n_ISForFaces;i++) { 5288 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5289 } 5290 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5291 n_ISForFaces = 0; 5292 } 5293 5294 /* check if near null space is attached to global mat */ 5295 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5296 if (nearnullsp) { 5297 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5298 /* remove any stored info */ 5299 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5300 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5301 /* store information for BDDC solver reuse */ 5302 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5303 pcbddc->onearnullspace = nearnullsp; 5304 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5305 for (i=0;i<nnsp_size;i++) { 5306 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5307 } 5308 } else { /* if near null space is not provided BDDC uses constants by default */ 5309 nnsp_size = 0; 5310 nnsp_has_cnst = PETSC_TRUE; 5311 } 5312 /* get max number of constraints on a single cc */ 5313 max_constraints = nnsp_size; 5314 if (nnsp_has_cnst) max_constraints++; 5315 5316 /* 5317 Evaluate maximum storage size needed by the procedure 5318 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5319 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5320 There can be multiple constraints per connected component 5321 */ 5322 n_vertices = 0; 5323 if (ISForVertices) { 5324 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5325 } 5326 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5327 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5328 5329 total_counts = n_ISForFaces+n_ISForEdges; 5330 total_counts *= max_constraints; 5331 total_counts += n_vertices; 5332 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5333 5334 total_counts = 0; 5335 max_size_of_constraint = 0; 5336 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5337 IS used_is; 5338 if (i<n_ISForEdges) { 5339 used_is = ISForEdges[i]; 5340 } else { 5341 used_is = ISForFaces[i-n_ISForEdges]; 5342 } 5343 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5344 total_counts += j; 5345 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5346 } 5347 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5348 5349 /* get local part of global near null space vectors */ 5350 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5351 for (k=0;k<nnsp_size;k++) { 5352 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5353 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5354 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5355 } 5356 5357 /* whether or not to skip lapack calls */ 5358 skip_lapack = PETSC_TRUE; 5359 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5360 5361 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5362 if (!skip_lapack) { 5363 PetscScalar temp_work; 5364 5365 #if defined(PETSC_MISSING_LAPACK_GESVD) 5366 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5367 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5368 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5369 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5370 #if defined(PETSC_USE_COMPLEX) 5371 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5372 #endif 5373 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5374 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5375 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5376 lwork = -1; 5377 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5378 #if !defined(PETSC_USE_COMPLEX) 5379 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5380 #else 5381 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5382 #endif 5383 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5384 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5385 #else /* on missing GESVD */ 5386 /* SVD */ 5387 PetscInt max_n,min_n; 5388 max_n = max_size_of_constraint; 5389 min_n = max_constraints; 5390 if (max_size_of_constraint < max_constraints) { 5391 min_n = max_size_of_constraint; 5392 max_n = max_constraints; 5393 } 5394 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5395 #if defined(PETSC_USE_COMPLEX) 5396 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5397 #endif 5398 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5399 lwork = -1; 5400 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5401 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5402 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5403 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5404 #if !defined(PETSC_USE_COMPLEX) 5405 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5406 #else 5407 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5408 #endif 5409 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5410 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5411 #endif /* on missing GESVD */ 5412 /* Allocate optimal workspace */ 5413 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5414 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5415 } 5416 /* Now we can loop on constraining sets */ 5417 total_counts = 0; 5418 constraints_idxs_ptr[0] = 0; 5419 constraints_data_ptr[0] = 0; 5420 /* vertices */ 5421 if (n_vertices) { 5422 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5423 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5424 for (i=0;i<n_vertices;i++) { 5425 constraints_n[total_counts] = 1; 5426 constraints_data[total_counts] = 1.0; 5427 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5428 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5429 total_counts++; 5430 } 5431 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5432 n_vertices = total_counts; 5433 } 5434 5435 /* edges and faces */ 5436 total_counts_cc = total_counts; 5437 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5438 IS used_is; 5439 PetscBool idxs_copied = PETSC_FALSE; 5440 5441 if (ncc<n_ISForEdges) { 5442 used_is = ISForEdges[ncc]; 5443 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5444 } else { 5445 used_is = ISForFaces[ncc-n_ISForEdges]; 5446 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5447 } 5448 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5449 5450 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5451 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5452 /* change of basis should not be performed on local periodic nodes */ 5453 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5454 if (nnsp_has_cnst) { 5455 PetscScalar quad_value; 5456 5457 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5458 idxs_copied = PETSC_TRUE; 5459 5460 if (!pcbddc->use_nnsp_true) { 5461 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5462 } else { 5463 quad_value = 1.0; 5464 } 5465 for (j=0;j<size_of_constraint;j++) { 5466 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5467 } 5468 temp_constraints++; 5469 total_counts++; 5470 } 5471 for (k=0;k<nnsp_size;k++) { 5472 PetscReal real_value; 5473 PetscScalar *ptr_to_data; 5474 5475 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5476 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5477 for (j=0;j<size_of_constraint;j++) { 5478 ptr_to_data[j] = array[is_indices[j]]; 5479 } 5480 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5481 /* check if array is null on the connected component */ 5482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5483 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5484 if (real_value > 0.0) { /* keep indices and values */ 5485 temp_constraints++; 5486 total_counts++; 5487 if (!idxs_copied) { 5488 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5489 idxs_copied = PETSC_TRUE; 5490 } 5491 } 5492 } 5493 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5494 valid_constraints = temp_constraints; 5495 if (!pcbddc->use_nnsp_true && temp_constraints) { 5496 if (temp_constraints == 1) { /* just normalize the constraint */ 5497 PetscScalar norm,*ptr_to_data; 5498 5499 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5501 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5502 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5503 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5504 } else { /* perform SVD */ 5505 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5506 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5507 5508 #if defined(PETSC_MISSING_LAPACK_GESVD) 5509 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5510 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5511 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5512 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5513 from that computed using LAPACKgesvd 5514 -> This is due to a different computation of eigenvectors in LAPACKheev 5515 -> The quality of the POD-computed basis will be the same */ 5516 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5517 /* Store upper triangular part of correlation matrix */ 5518 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5519 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5520 for (j=0;j<temp_constraints;j++) { 5521 for (k=0;k<j+1;k++) { 5522 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5523 } 5524 } 5525 /* compute eigenvalues and eigenvectors of correlation matrix */ 5526 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5527 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5528 #if !defined(PETSC_USE_COMPLEX) 5529 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5530 #else 5531 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5532 #endif 5533 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5534 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5535 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5536 j = 0; 5537 while (j < temp_constraints && singular_vals[j] < tol) j++; 5538 total_counts = total_counts-j; 5539 valid_constraints = temp_constraints-j; 5540 /* scale and copy POD basis into used quadrature memory */ 5541 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5542 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5543 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5544 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5545 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5546 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5547 if (j<temp_constraints) { 5548 PetscInt ii; 5549 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5550 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5551 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5552 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5553 for (k=0;k<temp_constraints-j;k++) { 5554 for (ii=0;ii<size_of_constraint;ii++) { 5555 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5556 } 5557 } 5558 } 5559 #else /* on missing GESVD */ 5560 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5561 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5563 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5564 #if !defined(PETSC_USE_COMPLEX) 5565 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5566 #else 5567 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5568 #endif 5569 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5570 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5571 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5572 k = temp_constraints; 5573 if (k > size_of_constraint) k = size_of_constraint; 5574 j = 0; 5575 while (j < k && singular_vals[k-j-1] < tol) j++; 5576 valid_constraints = k-j; 5577 total_counts = total_counts-temp_constraints+valid_constraints; 5578 #endif /* on missing GESVD */ 5579 } 5580 } 5581 /* update pointers information */ 5582 if (valid_constraints) { 5583 constraints_n[total_counts_cc] = valid_constraints; 5584 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5585 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5586 /* set change_of_basis flag */ 5587 if (boolforchange) { 5588 PetscBTSet(change_basis,total_counts_cc); 5589 } 5590 total_counts_cc++; 5591 } 5592 } 5593 /* free workspace */ 5594 if (!skip_lapack) { 5595 ierr = PetscFree(work);CHKERRQ(ierr); 5596 #if defined(PETSC_USE_COMPLEX) 5597 ierr = PetscFree(rwork);CHKERRQ(ierr); 5598 #endif 5599 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5600 #if defined(PETSC_MISSING_LAPACK_GESVD) 5601 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5602 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5603 #endif 5604 } 5605 for (k=0;k<nnsp_size;k++) { 5606 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5607 } 5608 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5609 /* free index sets of faces, edges and vertices */ 5610 for (i=0;i<n_ISForFaces;i++) { 5611 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5612 } 5613 if (n_ISForFaces) { 5614 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5615 } 5616 for (i=0;i<n_ISForEdges;i++) { 5617 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5618 } 5619 if (n_ISForEdges) { 5620 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5621 } 5622 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5623 } else { 5624 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5625 5626 total_counts = 0; 5627 n_vertices = 0; 5628 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5629 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5630 } 5631 max_constraints = 0; 5632 total_counts_cc = 0; 5633 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5634 total_counts += pcbddc->adaptive_constraints_n[i]; 5635 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5636 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5637 } 5638 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5639 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5640 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5641 constraints_data = pcbddc->adaptive_constraints_data; 5642 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5643 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5644 total_counts_cc = 0; 5645 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5646 if (pcbddc->adaptive_constraints_n[i]) { 5647 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5648 } 5649 } 5650 #if 0 5651 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5652 for (i=0;i<total_counts_cc;i++) { 5653 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5654 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5655 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5656 printf(" %d",constraints_idxs[j]); 5657 } 5658 printf("\n"); 5659 printf("number of cc: %d\n",constraints_n[i]); 5660 } 5661 for (i=0;i<n_vertices;i++) { 5662 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5663 } 5664 for (i=0;i<sub_schurs->n_subs;i++) { 5665 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5666 } 5667 #endif 5668 5669 max_size_of_constraint = 0; 5670 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5671 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5672 /* Change of basis */ 5673 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5674 if (pcbddc->use_change_of_basis) { 5675 for (i=0;i<sub_schurs->n_subs;i++) { 5676 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5677 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5678 } 5679 } 5680 } 5681 } 5682 pcbddc->local_primal_size = total_counts; 5683 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5684 5685 /* map constraints_idxs in boundary numbering */ 5686 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5687 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5688 5689 /* Create constraint matrix */ 5690 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5691 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5692 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5693 5694 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5695 /* determine if a QR strategy is needed for change of basis */ 5696 qr_needed = PETSC_FALSE; 5697 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5698 total_primal_vertices=0; 5699 pcbddc->local_primal_size_cc = 0; 5700 for (i=0;i<total_counts_cc;i++) { 5701 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5702 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5703 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5704 pcbddc->local_primal_size_cc += 1; 5705 } else if (PetscBTLookup(change_basis,i)) { 5706 for (k=0;k<constraints_n[i];k++) { 5707 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5708 } 5709 pcbddc->local_primal_size_cc += constraints_n[i]; 5710 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5711 PetscBTSet(qr_needed_idx,i); 5712 qr_needed = PETSC_TRUE; 5713 } 5714 } else { 5715 pcbddc->local_primal_size_cc += 1; 5716 } 5717 } 5718 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5719 pcbddc->n_vertices = total_primal_vertices; 5720 /* permute indices in order to have a sorted set of vertices */ 5721 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5722 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5723 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5724 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5725 5726 /* nonzero structure of constraint matrix */ 5727 /* and get reference dof for local constraints */ 5728 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5729 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5730 5731 j = total_primal_vertices; 5732 total_counts = total_primal_vertices; 5733 cum = total_primal_vertices; 5734 for (i=n_vertices;i<total_counts_cc;i++) { 5735 if (!PetscBTLookup(change_basis,i)) { 5736 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5737 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5738 cum++; 5739 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5740 for (k=0;k<constraints_n[i];k++) { 5741 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5742 nnz[j+k] = size_of_constraint; 5743 } 5744 j += constraints_n[i]; 5745 } 5746 } 5747 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5748 ierr = PetscFree(nnz);CHKERRQ(ierr); 5749 5750 /* set values in constraint matrix */ 5751 for (i=0;i<total_primal_vertices;i++) { 5752 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5753 } 5754 total_counts = total_primal_vertices; 5755 for (i=n_vertices;i<total_counts_cc;i++) { 5756 if (!PetscBTLookup(change_basis,i)) { 5757 PetscInt *cols; 5758 5759 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5760 cols = constraints_idxs+constraints_idxs_ptr[i]; 5761 for (k=0;k<constraints_n[i];k++) { 5762 PetscInt row = total_counts+k; 5763 PetscScalar *vals; 5764 5765 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5766 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5767 } 5768 total_counts += constraints_n[i]; 5769 } 5770 } 5771 /* assembling */ 5772 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5773 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5774 5775 /* 5776 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5777 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5778 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5779 */ 5780 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5781 if (pcbddc->use_change_of_basis) { 5782 /* dual and primal dofs on a single cc */ 5783 PetscInt dual_dofs,primal_dofs; 5784 /* working stuff for GEQRF */ 5785 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5786 PetscBLASInt lqr_work; 5787 /* working stuff for UNGQR */ 5788 PetscScalar *gqr_work,lgqr_work_t; 5789 PetscBLASInt lgqr_work; 5790 /* working stuff for TRTRS */ 5791 PetscScalar *trs_rhs; 5792 PetscBLASInt Blas_NRHS; 5793 /* pointers for values insertion into change of basis matrix */ 5794 PetscInt *start_rows,*start_cols; 5795 PetscScalar *start_vals; 5796 /* working stuff for values insertion */ 5797 PetscBT is_primal; 5798 PetscInt *aux_primal_numbering_B; 5799 /* matrix sizes */ 5800 PetscInt global_size,local_size; 5801 /* temporary change of basis */ 5802 Mat localChangeOfBasisMatrix; 5803 /* extra space for debugging */ 5804 PetscScalar *dbg_work; 5805 5806 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5807 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5808 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5809 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5810 /* nonzeros for local mat */ 5811 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5812 if (!pcbddc->benign_change || pcbddc->fake_change) { 5813 for (i=0;i<pcis->n;i++) nnz[i]=1; 5814 } else { 5815 const PetscInt *ii; 5816 PetscInt n; 5817 PetscBool flg_row; 5818 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5819 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5820 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5821 } 5822 for (i=n_vertices;i<total_counts_cc;i++) { 5823 if (PetscBTLookup(change_basis,i)) { 5824 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5825 if (PetscBTLookup(qr_needed_idx,i)) { 5826 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5827 } else { 5828 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5829 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5830 } 5831 } 5832 } 5833 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5834 ierr = PetscFree(nnz);CHKERRQ(ierr); 5835 /* Set interior change in the matrix */ 5836 if (!pcbddc->benign_change || pcbddc->fake_change) { 5837 for (i=0;i<pcis->n;i++) { 5838 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5839 } 5840 } else { 5841 const PetscInt *ii,*jj; 5842 PetscScalar *aa; 5843 PetscInt n; 5844 PetscBool flg_row; 5845 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5846 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5847 for (i=0;i<n;i++) { 5848 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5849 } 5850 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5851 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5852 } 5853 5854 if (pcbddc->dbg_flag) { 5855 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5856 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5857 } 5858 5859 5860 /* Now we loop on the constraints which need a change of basis */ 5861 /* 5862 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5863 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5864 5865 Basic blocks of change of basis matrix T computed by 5866 5867 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5868 5869 | 1 0 ... 0 s_1/S | 5870 | 0 1 ... 0 s_2/S | 5871 | ... | 5872 | 0 ... 1 s_{n-1}/S | 5873 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5874 5875 with S = \sum_{i=1}^n s_i^2 5876 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5877 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5878 5879 - QR decomposition of constraints otherwise 5880 */ 5881 if (qr_needed) { 5882 /* space to store Q */ 5883 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5884 /* array to store scaling factors for reflectors */ 5885 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5886 /* first we issue queries for optimal work */ 5887 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5888 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5889 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5890 lqr_work = -1; 5891 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5892 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5893 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5894 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5895 lgqr_work = -1; 5896 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5897 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5898 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5899 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5900 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5901 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5902 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5903 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5904 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5905 /* array to store rhs and solution of triangular solver */ 5906 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5907 /* allocating workspace for check */ 5908 if (pcbddc->dbg_flag) { 5909 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5910 } 5911 } 5912 /* array to store whether a node is primal or not */ 5913 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5914 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5915 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5916 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5917 for (i=0;i<total_primal_vertices;i++) { 5918 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5919 } 5920 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5921 5922 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5923 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5924 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5925 if (PetscBTLookup(change_basis,total_counts)) { 5926 /* get constraint info */ 5927 primal_dofs = constraints_n[total_counts]; 5928 dual_dofs = size_of_constraint-primal_dofs; 5929 5930 if (pcbddc->dbg_flag) { 5931 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5932 } 5933 5934 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5935 5936 /* copy quadrature constraints for change of basis check */ 5937 if (pcbddc->dbg_flag) { 5938 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5939 } 5940 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5941 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5942 5943 /* compute QR decomposition of constraints */ 5944 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5945 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5946 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5947 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5948 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5949 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5950 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5951 5952 /* explictly compute R^-T */ 5953 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5954 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5955 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5956 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5957 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5958 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5959 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5960 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5961 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5962 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5963 5964 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5965 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5966 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5967 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5968 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5969 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5970 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5971 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5972 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5973 5974 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5975 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5976 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5977 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5978 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5979 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5980 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5981 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5982 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5983 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5984 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5985 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5986 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5987 5988 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5989 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5990 /* insert cols for primal dofs */ 5991 for (j=0;j<primal_dofs;j++) { 5992 start_vals = &qr_basis[j*size_of_constraint]; 5993 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5994 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5995 } 5996 /* insert cols for dual dofs */ 5997 for (j=0,k=0;j<dual_dofs;k++) { 5998 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5999 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6000 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6001 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6002 j++; 6003 } 6004 } 6005 6006 /* check change of basis */ 6007 if (pcbddc->dbg_flag) { 6008 PetscInt ii,jj; 6009 PetscBool valid_qr=PETSC_TRUE; 6010 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6011 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6013 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6015 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6016 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6017 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6018 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6019 for (jj=0;jj<size_of_constraint;jj++) { 6020 for (ii=0;ii<primal_dofs;ii++) { 6021 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6022 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 6023 } 6024 } 6025 if (!valid_qr) { 6026 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6027 for (jj=0;jj<size_of_constraint;jj++) { 6028 for (ii=0;ii<primal_dofs;ii++) { 6029 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6030 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6031 } 6032 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 6033 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6034 } 6035 } 6036 } 6037 } else { 6038 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6039 } 6040 } 6041 } else { /* simple transformation block */ 6042 PetscInt row,col; 6043 PetscScalar val,norm; 6044 6045 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6046 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6047 for (j=0;j<size_of_constraint;j++) { 6048 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6049 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6050 if (!PetscBTLookup(is_primal,row_B)) { 6051 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6052 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6053 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6054 } else { 6055 for (k=0;k<size_of_constraint;k++) { 6056 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6057 if (row != col) { 6058 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6059 } else { 6060 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6061 } 6062 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6063 } 6064 } 6065 } 6066 if (pcbddc->dbg_flag) { 6067 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6068 } 6069 } 6070 } else { 6071 if (pcbddc->dbg_flag) { 6072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6073 } 6074 } 6075 } 6076 6077 /* free workspace */ 6078 if (qr_needed) { 6079 if (pcbddc->dbg_flag) { 6080 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6081 } 6082 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6083 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6084 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6085 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6086 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6087 } 6088 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6089 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6090 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6091 6092 /* assembling of global change of variable */ 6093 if (!pcbddc->fake_change) { 6094 Mat tmat; 6095 PetscInt bs; 6096 6097 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6098 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6099 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6100 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6101 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6102 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6103 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6104 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6105 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6106 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6107 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6108 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6109 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6110 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6111 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6112 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6113 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6114 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6115 6116 /* check */ 6117 if (pcbddc->dbg_flag) { 6118 PetscReal error; 6119 Vec x,x_change; 6120 6121 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6122 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6123 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6124 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6125 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6126 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6127 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6128 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6129 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6130 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6131 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6132 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6133 if (error > PETSC_SMALL) { 6134 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6135 } 6136 ierr = VecDestroy(&x);CHKERRQ(ierr); 6137 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6138 } 6139 /* adapt sub_schurs computed (if any) */ 6140 if (pcbddc->use_deluxe_scaling) { 6141 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6142 6143 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6144 if (sub_schurs && sub_schurs->S_Ej_all) { 6145 Mat S_new,tmat; 6146 IS is_all_N,is_V_Sall = NULL; 6147 6148 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6149 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6150 if (pcbddc->deluxe_zerorows) { 6151 ISLocalToGlobalMapping NtoSall; 6152 IS is_V; 6153 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6154 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6155 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6156 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6157 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6158 } 6159 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6160 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6161 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6162 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6163 if (pcbddc->deluxe_zerorows) { 6164 const PetscScalar *array; 6165 const PetscInt *idxs_V,*idxs_all; 6166 PetscInt i,n_V; 6167 6168 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6169 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6170 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6171 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6172 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6173 for (i=0;i<n_V;i++) { 6174 PetscScalar val; 6175 PetscInt idx; 6176 6177 idx = idxs_V[i]; 6178 val = array[idxs_all[idxs_V[i]]]; 6179 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6180 } 6181 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6182 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6183 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6184 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6185 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6186 } 6187 sub_schurs->S_Ej_all = S_new; 6188 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6189 if (sub_schurs->sum_S_Ej_all) { 6190 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6191 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6192 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6193 if (pcbddc->deluxe_zerorows) { 6194 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6195 } 6196 sub_schurs->sum_S_Ej_all = S_new; 6197 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6198 } 6199 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6200 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6201 } 6202 /* destroy any change of basis context in sub_schurs */ 6203 if (sub_schurs && sub_schurs->change) { 6204 PetscInt i; 6205 6206 for (i=0;i<sub_schurs->n_subs;i++) { 6207 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6208 } 6209 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6210 } 6211 } 6212 if (pcbddc->switch_static) { /* need to save the local change */ 6213 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6214 } else { 6215 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6216 } 6217 /* determine if any process has changed the pressures locally */ 6218 pcbddc->change_interior = pcbddc->benign_have_null; 6219 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6220 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6221 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6222 pcbddc->use_qr_single = qr_needed; 6223 } 6224 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6225 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6226 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6227 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6228 } else { 6229 Mat benign_global = NULL; 6230 if (pcbddc->benign_have_null) { 6231 Mat tmat; 6232 6233 pcbddc->change_interior = PETSC_TRUE; 6234 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6235 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6236 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6237 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6238 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6239 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6240 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6241 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6242 if (pcbddc->benign_change) { 6243 Mat M; 6244 6245 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6246 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6247 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6248 ierr = MatDestroy(&M);CHKERRQ(ierr); 6249 } else { 6250 Mat eye; 6251 PetscScalar *array; 6252 6253 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6254 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6255 for (i=0;i<pcis->n;i++) { 6256 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6257 } 6258 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6259 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6260 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6261 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6262 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6263 } 6264 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6265 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6266 } 6267 if (pcbddc->user_ChangeOfBasisMatrix) { 6268 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6269 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6270 } else if (pcbddc->benign_have_null) { 6271 pcbddc->ChangeOfBasisMatrix = benign_global; 6272 } 6273 } 6274 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6275 IS is_global; 6276 const PetscInt *gidxs; 6277 6278 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6279 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6280 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6281 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6282 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6283 } 6284 } 6285 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6286 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6287 } 6288 6289 if (!pcbddc->fake_change) { 6290 /* add pressure dofs to set of primal nodes for numbering purposes */ 6291 for (i=0;i<pcbddc->benign_n;i++) { 6292 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6293 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6294 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6295 pcbddc->local_primal_size_cc++; 6296 pcbddc->local_primal_size++; 6297 } 6298 6299 /* check if a new primal space has been introduced (also take into account benign trick) */ 6300 pcbddc->new_primal_space_local = PETSC_TRUE; 6301 if (olocal_primal_size == pcbddc->local_primal_size) { 6302 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6303 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6304 if (!pcbddc->new_primal_space_local) { 6305 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6306 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6307 } 6308 } 6309 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6310 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6311 } 6312 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6313 6314 /* flush dbg viewer */ 6315 if (pcbddc->dbg_flag) { 6316 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6317 } 6318 6319 /* free workspace */ 6320 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6321 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6322 if (!pcbddc->adaptive_selection) { 6323 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6324 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6325 } else { 6326 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6327 pcbddc->adaptive_constraints_idxs_ptr, 6328 pcbddc->adaptive_constraints_data_ptr, 6329 pcbddc->adaptive_constraints_idxs, 6330 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6331 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6332 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6333 } 6334 PetscFunctionReturn(0); 6335 } 6336 6337 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6338 { 6339 ISLocalToGlobalMapping map; 6340 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6341 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6342 PetscInt i,N; 6343 PetscBool rcsr = PETSC_FALSE; 6344 PetscErrorCode ierr; 6345 6346 PetscFunctionBegin; 6347 if (pcbddc->recompute_topography) { 6348 pcbddc->graphanalyzed = PETSC_FALSE; 6349 /* Reset previously computed graph */ 6350 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6351 /* Init local Graph struct */ 6352 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6353 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6354 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6355 6356 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6357 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6358 } 6359 /* Check validity of the csr graph passed in by the user */ 6360 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6361 6362 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6363 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6364 PetscInt *xadj,*adjncy; 6365 PetscInt nvtxs; 6366 PetscBool flg_row=PETSC_FALSE; 6367 6368 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6369 if (flg_row) { 6370 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6371 pcbddc->computed_rowadj = PETSC_TRUE; 6372 } 6373 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6374 rcsr = PETSC_TRUE; 6375 } 6376 if (pcbddc->dbg_flag) { 6377 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6378 } 6379 6380 /* Setup of Graph */ 6381 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6382 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6383 6384 /* attach info on disconnected subdomains if present */ 6385 if (pcbddc->n_local_subs) { 6386 PetscInt *local_subs; 6387 6388 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6389 for (i=0;i<pcbddc->n_local_subs;i++) { 6390 const PetscInt *idxs; 6391 PetscInt nl,j; 6392 6393 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6394 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6395 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6396 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6397 } 6398 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6399 pcbddc->mat_graph->local_subs = local_subs; 6400 } 6401 } 6402 6403 if (!pcbddc->graphanalyzed) { 6404 /* Graph's connected components analysis */ 6405 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6406 pcbddc->graphanalyzed = PETSC_TRUE; 6407 } 6408 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6409 PetscFunctionReturn(0); 6410 } 6411 6412 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6413 { 6414 PetscInt i,j; 6415 PetscScalar *alphas; 6416 PetscErrorCode ierr; 6417 6418 PetscFunctionBegin; 6419 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6420 for (i=0;i<n;i++) { 6421 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6422 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6423 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6424 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6425 } 6426 ierr = PetscFree(alphas);CHKERRQ(ierr); 6427 PetscFunctionReturn(0); 6428 } 6429 6430 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6431 { 6432 Mat A; 6433 PetscInt n_neighs,*neighs,*n_shared,**shared; 6434 PetscMPIInt size,rank,color; 6435 PetscInt *xadj,*adjncy; 6436 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6437 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6438 PetscInt void_procs,*procs_candidates = NULL; 6439 PetscInt xadj_count,*count; 6440 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6441 PetscSubcomm psubcomm; 6442 MPI_Comm subcomm; 6443 PetscErrorCode ierr; 6444 6445 PetscFunctionBegin; 6446 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6447 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6448 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6449 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6450 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6451 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6452 6453 if (have_void) *have_void = PETSC_FALSE; 6454 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6455 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6456 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6457 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6458 im_active = !!n; 6459 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6460 void_procs = size - active_procs; 6461 /* get ranks of of non-active processes in mat communicator */ 6462 if (void_procs) { 6463 PetscInt ncand; 6464 6465 if (have_void) *have_void = PETSC_TRUE; 6466 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6467 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6468 for (i=0,ncand=0;i<size;i++) { 6469 if (!procs_candidates[i]) { 6470 procs_candidates[ncand++] = i; 6471 } 6472 } 6473 /* force n_subdomains to be not greater that the number of non-active processes */ 6474 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6475 } 6476 6477 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6478 number of subdomains requested 1 -> send to master or first candidate in voids */ 6479 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6480 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6481 PetscInt issize,isidx,dest; 6482 if (*n_subdomains == 1) dest = 0; 6483 else dest = rank; 6484 if (im_active) { 6485 issize = 1; 6486 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6487 isidx = procs_candidates[dest]; 6488 } else { 6489 isidx = dest; 6490 } 6491 } else { 6492 issize = 0; 6493 isidx = -1; 6494 } 6495 if (*n_subdomains != 1) *n_subdomains = active_procs; 6496 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6497 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6498 PetscFunctionReturn(0); 6499 } 6500 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6501 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6502 threshold = PetscMax(threshold,2); 6503 6504 /* Get info on mapping */ 6505 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6506 6507 /* build local CSR graph of subdomains' connectivity */ 6508 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6509 xadj[0] = 0; 6510 xadj[1] = PetscMax(n_neighs-1,0); 6511 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6512 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6513 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6514 for (i=1;i<n_neighs;i++) 6515 for (j=0;j<n_shared[i];j++) 6516 count[shared[i][j]] += 1; 6517 6518 xadj_count = 0; 6519 for (i=1;i<n_neighs;i++) { 6520 for (j=0;j<n_shared[i];j++) { 6521 if (count[shared[i][j]] < threshold) { 6522 adjncy[xadj_count] = neighs[i]; 6523 adjncy_wgt[xadj_count] = n_shared[i]; 6524 xadj_count++; 6525 break; 6526 } 6527 } 6528 } 6529 xadj[1] = xadj_count; 6530 ierr = PetscFree(count);CHKERRQ(ierr); 6531 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6532 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6533 6534 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6535 6536 /* Restrict work on active processes only */ 6537 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6538 if (void_procs) { 6539 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6540 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6541 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6542 subcomm = PetscSubcommChild(psubcomm); 6543 } else { 6544 psubcomm = NULL; 6545 subcomm = PetscObjectComm((PetscObject)mat); 6546 } 6547 6548 v_wgt = NULL; 6549 if (!color) { 6550 ierr = PetscFree(xadj);CHKERRQ(ierr); 6551 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6552 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6553 } else { 6554 Mat subdomain_adj; 6555 IS new_ranks,new_ranks_contig; 6556 MatPartitioning partitioner; 6557 PetscInt rstart=0,rend=0; 6558 PetscInt *is_indices,*oldranks; 6559 PetscMPIInt size; 6560 PetscBool aggregate; 6561 6562 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6563 if (void_procs) { 6564 PetscInt prank = rank; 6565 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6566 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6567 for (i=0;i<xadj[1];i++) { 6568 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6569 } 6570 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6571 } else { 6572 oldranks = NULL; 6573 } 6574 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6575 if (aggregate) { /* TODO: all this part could be made more efficient */ 6576 PetscInt lrows,row,ncols,*cols; 6577 PetscMPIInt nrank; 6578 PetscScalar *vals; 6579 6580 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6581 lrows = 0; 6582 if (nrank<redprocs) { 6583 lrows = size/redprocs; 6584 if (nrank<size%redprocs) lrows++; 6585 } 6586 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6587 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6588 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6589 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6590 row = nrank; 6591 ncols = xadj[1]-xadj[0]; 6592 cols = adjncy; 6593 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6594 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6595 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6596 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6597 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6598 ierr = PetscFree(xadj);CHKERRQ(ierr); 6599 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6600 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6601 ierr = PetscFree(vals);CHKERRQ(ierr); 6602 if (use_vwgt) { 6603 Vec v; 6604 const PetscScalar *array; 6605 PetscInt nl; 6606 6607 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6608 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6609 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6610 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6611 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6612 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6613 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6614 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6615 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6616 ierr = VecDestroy(&v);CHKERRQ(ierr); 6617 } 6618 } else { 6619 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6620 if (use_vwgt) { 6621 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6622 v_wgt[0] = n; 6623 } 6624 } 6625 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6626 6627 /* Partition */ 6628 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6629 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6630 if (v_wgt) { 6631 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6632 } 6633 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6634 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6635 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6636 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6637 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6638 6639 /* renumber new_ranks to avoid "holes" in new set of processors */ 6640 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6641 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6642 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6643 if (!aggregate) { 6644 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6645 #if defined(PETSC_USE_DEBUG) 6646 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6647 #endif 6648 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6649 } else if (oldranks) { 6650 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6651 } else { 6652 ranks_send_to_idx[0] = is_indices[0]; 6653 } 6654 } else { 6655 PetscInt idxs[1]; 6656 PetscMPIInt tag; 6657 MPI_Request *reqs; 6658 6659 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6660 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6661 for (i=rstart;i<rend;i++) { 6662 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6663 } 6664 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6665 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6666 ierr = PetscFree(reqs);CHKERRQ(ierr); 6667 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6668 #if defined(PETSC_USE_DEBUG) 6669 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6670 #endif 6671 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6672 } else if (oldranks) { 6673 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6674 } else { 6675 ranks_send_to_idx[0] = idxs[0]; 6676 } 6677 } 6678 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6679 /* clean up */ 6680 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6681 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6682 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6683 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6684 } 6685 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6686 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6687 6688 /* assemble parallel IS for sends */ 6689 i = 1; 6690 if (!color) i=0; 6691 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6692 PetscFunctionReturn(0); 6693 } 6694 6695 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6696 6697 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6698 { 6699 Mat local_mat; 6700 IS is_sends_internal; 6701 PetscInt rows,cols,new_local_rows; 6702 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6703 PetscBool ismatis,isdense,newisdense,destroy_mat; 6704 ISLocalToGlobalMapping l2gmap; 6705 PetscInt* l2gmap_indices; 6706 const PetscInt* is_indices; 6707 MatType new_local_type; 6708 /* buffers */ 6709 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6710 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6711 PetscInt *recv_buffer_idxs_local; 6712 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6713 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6714 /* MPI */ 6715 MPI_Comm comm,comm_n; 6716 PetscSubcomm subcomm; 6717 PetscMPIInt n_sends,n_recvs,commsize; 6718 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6719 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6720 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6721 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6722 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6723 PetscErrorCode ierr; 6724 6725 PetscFunctionBegin; 6726 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6727 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6728 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6729 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6730 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6731 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6732 PetscValidLogicalCollectiveBool(mat,reuse,6); 6733 PetscValidLogicalCollectiveInt(mat,nis,8); 6734 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6735 if (nvecs) { 6736 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6737 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6738 } 6739 /* further checks */ 6740 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6741 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6742 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6743 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6744 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6745 if (reuse && *mat_n) { 6746 PetscInt mrows,mcols,mnrows,mncols; 6747 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6748 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6749 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6750 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6751 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6752 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6753 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6754 } 6755 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6756 PetscValidLogicalCollectiveInt(mat,bs,0); 6757 6758 /* prepare IS for sending if not provided */ 6759 if (!is_sends) { 6760 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6761 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6762 } else { 6763 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6764 is_sends_internal = is_sends; 6765 } 6766 6767 /* get comm */ 6768 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6769 6770 /* compute number of sends */ 6771 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6772 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6773 6774 /* compute number of receives */ 6775 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6776 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6777 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6778 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6779 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6780 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6781 ierr = PetscFree(iflags);CHKERRQ(ierr); 6782 6783 /* restrict comm if requested */ 6784 subcomm = 0; 6785 destroy_mat = PETSC_FALSE; 6786 if (restrict_comm) { 6787 PetscMPIInt color,subcommsize; 6788 6789 color = 0; 6790 if (restrict_full) { 6791 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6792 } else { 6793 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6794 } 6795 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6796 subcommsize = commsize - subcommsize; 6797 /* check if reuse has been requested */ 6798 if (reuse) { 6799 if (*mat_n) { 6800 PetscMPIInt subcommsize2; 6801 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6802 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6803 comm_n = PetscObjectComm((PetscObject)*mat_n); 6804 } else { 6805 comm_n = PETSC_COMM_SELF; 6806 } 6807 } else { /* MAT_INITIAL_MATRIX */ 6808 PetscMPIInt rank; 6809 6810 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6811 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6812 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6813 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6814 comm_n = PetscSubcommChild(subcomm); 6815 } 6816 /* flag to destroy *mat_n if not significative */ 6817 if (color) destroy_mat = PETSC_TRUE; 6818 } else { 6819 comm_n = comm; 6820 } 6821 6822 /* prepare send/receive buffers */ 6823 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6824 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6825 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6826 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6827 if (nis) { 6828 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6829 } 6830 6831 /* Get data from local matrices */ 6832 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6833 /* TODO: See below some guidelines on how to prepare the local buffers */ 6834 /* 6835 send_buffer_vals should contain the raw values of the local matrix 6836 send_buffer_idxs should contain: 6837 - MatType_PRIVATE type 6838 - PetscInt size_of_l2gmap 6839 - PetscInt global_row_indices[size_of_l2gmap] 6840 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6841 */ 6842 else { 6843 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6844 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6845 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6846 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6847 send_buffer_idxs[1] = i; 6848 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6849 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6850 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6851 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6852 for (i=0;i<n_sends;i++) { 6853 ilengths_vals[is_indices[i]] = len*len; 6854 ilengths_idxs[is_indices[i]] = len+2; 6855 } 6856 } 6857 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6858 /* additional is (if any) */ 6859 if (nis) { 6860 PetscMPIInt psum; 6861 PetscInt j; 6862 for (j=0,psum=0;j<nis;j++) { 6863 PetscInt plen; 6864 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6865 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6866 psum += len+1; /* indices + lenght */ 6867 } 6868 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6869 for (j=0,psum=0;j<nis;j++) { 6870 PetscInt plen; 6871 const PetscInt *is_array_idxs; 6872 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6873 send_buffer_idxs_is[psum] = plen; 6874 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6875 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6876 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6877 psum += plen+1; /* indices + lenght */ 6878 } 6879 for (i=0;i<n_sends;i++) { 6880 ilengths_idxs_is[is_indices[i]] = psum; 6881 } 6882 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6883 } 6884 6885 buf_size_idxs = 0; 6886 buf_size_vals = 0; 6887 buf_size_idxs_is = 0; 6888 buf_size_vecs = 0; 6889 for (i=0;i<n_recvs;i++) { 6890 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6891 buf_size_vals += (PetscInt)olengths_vals[i]; 6892 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6893 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6894 } 6895 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6896 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6897 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6898 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6899 6900 /* get new tags for clean communications */ 6901 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6902 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6903 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6904 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6905 6906 /* allocate for requests */ 6907 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6908 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6909 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6910 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6911 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6912 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6913 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6914 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6915 6916 /* communications */ 6917 ptr_idxs = recv_buffer_idxs; 6918 ptr_vals = recv_buffer_vals; 6919 ptr_idxs_is = recv_buffer_idxs_is; 6920 ptr_vecs = recv_buffer_vecs; 6921 for (i=0;i<n_recvs;i++) { 6922 source_dest = onodes[i]; 6923 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6924 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6925 ptr_idxs += olengths_idxs[i]; 6926 ptr_vals += olengths_vals[i]; 6927 if (nis) { 6928 source_dest = onodes_is[i]; 6929 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6930 ptr_idxs_is += olengths_idxs_is[i]; 6931 } 6932 if (nvecs) { 6933 source_dest = onodes[i]; 6934 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6935 ptr_vecs += olengths_idxs[i]-2; 6936 } 6937 } 6938 for (i=0;i<n_sends;i++) { 6939 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6940 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6941 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6942 if (nis) { 6943 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6944 } 6945 if (nvecs) { 6946 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6947 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6948 } 6949 } 6950 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6951 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6952 6953 /* assemble new l2g map */ 6954 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6955 ptr_idxs = recv_buffer_idxs; 6956 new_local_rows = 0; 6957 for (i=0;i<n_recvs;i++) { 6958 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6959 ptr_idxs += olengths_idxs[i]; 6960 } 6961 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6962 ptr_idxs = recv_buffer_idxs; 6963 new_local_rows = 0; 6964 for (i=0;i<n_recvs;i++) { 6965 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6966 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6967 ptr_idxs += olengths_idxs[i]; 6968 } 6969 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6970 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6971 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6972 6973 /* infer new local matrix type from received local matrices type */ 6974 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6975 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6976 if (n_recvs) { 6977 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6978 ptr_idxs = recv_buffer_idxs; 6979 for (i=0;i<n_recvs;i++) { 6980 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6981 new_local_type_private = MATAIJ_PRIVATE; 6982 break; 6983 } 6984 ptr_idxs += olengths_idxs[i]; 6985 } 6986 switch (new_local_type_private) { 6987 case MATDENSE_PRIVATE: 6988 new_local_type = MATSEQAIJ; 6989 bs = 1; 6990 break; 6991 case MATAIJ_PRIVATE: 6992 new_local_type = MATSEQAIJ; 6993 bs = 1; 6994 break; 6995 case MATBAIJ_PRIVATE: 6996 new_local_type = MATSEQBAIJ; 6997 break; 6998 case MATSBAIJ_PRIVATE: 6999 new_local_type = MATSEQSBAIJ; 7000 break; 7001 default: 7002 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7003 break; 7004 } 7005 } else { /* by default, new_local_type is seqaij */ 7006 new_local_type = MATSEQAIJ; 7007 bs = 1; 7008 } 7009 7010 /* create MATIS object if needed */ 7011 if (!reuse) { 7012 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7013 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7014 } else { 7015 /* it also destroys the local matrices */ 7016 if (*mat_n) { 7017 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7018 } else { /* this is a fake object */ 7019 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7020 } 7021 } 7022 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7023 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7024 7025 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7026 7027 /* Global to local map of received indices */ 7028 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7029 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7030 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7031 7032 /* restore attributes -> type of incoming data and its size */ 7033 buf_size_idxs = 0; 7034 for (i=0;i<n_recvs;i++) { 7035 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7036 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7037 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7038 } 7039 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7040 7041 /* set preallocation */ 7042 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7043 if (!newisdense) { 7044 PetscInt *new_local_nnz=0; 7045 7046 ptr_idxs = recv_buffer_idxs_local; 7047 if (n_recvs) { 7048 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7049 } 7050 for (i=0;i<n_recvs;i++) { 7051 PetscInt j; 7052 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7053 for (j=0;j<*(ptr_idxs+1);j++) { 7054 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7055 } 7056 } else { 7057 /* TODO */ 7058 } 7059 ptr_idxs += olengths_idxs[i]; 7060 } 7061 if (new_local_nnz) { 7062 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7063 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7064 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7065 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7066 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7067 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7068 } else { 7069 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7070 } 7071 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7072 } else { 7073 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7074 } 7075 7076 /* set values */ 7077 ptr_vals = recv_buffer_vals; 7078 ptr_idxs = recv_buffer_idxs_local; 7079 for (i=0;i<n_recvs;i++) { 7080 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7081 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7082 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7083 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7084 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7085 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7086 } else { 7087 /* TODO */ 7088 } 7089 ptr_idxs += olengths_idxs[i]; 7090 ptr_vals += olengths_vals[i]; 7091 } 7092 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7093 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7094 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7095 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7096 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7097 7098 #if 0 7099 if (!restrict_comm) { /* check */ 7100 Vec lvec,rvec; 7101 PetscReal infty_error; 7102 7103 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7104 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7105 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7106 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7107 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7108 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7109 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7110 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7111 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7112 } 7113 #endif 7114 7115 /* assemble new additional is (if any) */ 7116 if (nis) { 7117 PetscInt **temp_idxs,*count_is,j,psum; 7118 7119 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7120 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7121 ptr_idxs = recv_buffer_idxs_is; 7122 psum = 0; 7123 for (i=0;i<n_recvs;i++) { 7124 for (j=0;j<nis;j++) { 7125 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7126 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7127 psum += plen; 7128 ptr_idxs += plen+1; /* shift pointer to received data */ 7129 } 7130 } 7131 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7132 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7133 for (i=1;i<nis;i++) { 7134 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7135 } 7136 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7137 ptr_idxs = recv_buffer_idxs_is; 7138 for (i=0;i<n_recvs;i++) { 7139 for (j=0;j<nis;j++) { 7140 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7141 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7142 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7143 ptr_idxs += plen+1; /* shift pointer to received data */ 7144 } 7145 } 7146 for (i=0;i<nis;i++) { 7147 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7148 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7149 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7150 } 7151 ierr = PetscFree(count_is);CHKERRQ(ierr); 7152 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7153 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7154 } 7155 /* free workspace */ 7156 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7157 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7158 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7159 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7160 if (isdense) { 7161 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7162 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7163 } else { 7164 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7165 } 7166 if (nis) { 7167 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7168 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7169 } 7170 7171 if (nvecs) { 7172 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7173 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7174 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7175 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7176 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7177 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7178 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7179 /* set values */ 7180 ptr_vals = recv_buffer_vecs; 7181 ptr_idxs = recv_buffer_idxs_local; 7182 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7183 for (i=0;i<n_recvs;i++) { 7184 PetscInt j; 7185 for (j=0;j<*(ptr_idxs+1);j++) { 7186 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7187 } 7188 ptr_idxs += olengths_idxs[i]; 7189 ptr_vals += olengths_idxs[i]-2; 7190 } 7191 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7192 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7193 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7194 } 7195 7196 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7197 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7198 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7199 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7200 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7201 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7202 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7203 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7204 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7205 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7206 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7207 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7208 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7209 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7210 ierr = PetscFree(onodes);CHKERRQ(ierr); 7211 if (nis) { 7212 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7213 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7214 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7215 } 7216 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7217 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7218 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7219 for (i=0;i<nis;i++) { 7220 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7221 } 7222 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7223 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7224 } 7225 *mat_n = NULL; 7226 } 7227 PetscFunctionReturn(0); 7228 } 7229 7230 /* temporary hack into ksp private data structure */ 7231 #include <petsc/private/kspimpl.h> 7232 7233 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7234 { 7235 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7236 PC_IS *pcis = (PC_IS*)pc->data; 7237 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7238 Mat coarsedivudotp = NULL; 7239 Mat coarseG,t_coarse_mat_is; 7240 MatNullSpace CoarseNullSpace = NULL; 7241 ISLocalToGlobalMapping coarse_islg; 7242 IS coarse_is,*isarray; 7243 PetscInt i,im_active=-1,active_procs=-1; 7244 PetscInt nis,nisdofs,nisneu,nisvert; 7245 PC pc_temp; 7246 PCType coarse_pc_type; 7247 KSPType coarse_ksp_type; 7248 PetscBool multilevel_requested,multilevel_allowed; 7249 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7250 PetscInt ncoarse,nedcfield; 7251 PetscBool compute_vecs = PETSC_FALSE; 7252 PetscScalar *array; 7253 MatReuse coarse_mat_reuse; 7254 PetscBool restr, full_restr, have_void; 7255 PetscMPIInt commsize; 7256 PetscErrorCode ierr; 7257 7258 PetscFunctionBegin; 7259 /* Assign global numbering to coarse dofs */ 7260 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7261 PetscInt ocoarse_size; 7262 compute_vecs = PETSC_TRUE; 7263 7264 pcbddc->new_primal_space = PETSC_TRUE; 7265 ocoarse_size = pcbddc->coarse_size; 7266 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7267 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7268 /* see if we can avoid some work */ 7269 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7270 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7271 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7272 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7273 coarse_reuse = PETSC_FALSE; 7274 } else { /* we can safely reuse already computed coarse matrix */ 7275 coarse_reuse = PETSC_TRUE; 7276 } 7277 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7278 coarse_reuse = PETSC_FALSE; 7279 } 7280 /* reset any subassembling information */ 7281 if (!coarse_reuse || pcbddc->recompute_topography) { 7282 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7283 } 7284 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7285 coarse_reuse = PETSC_TRUE; 7286 } 7287 /* assemble coarse matrix */ 7288 if (coarse_reuse && pcbddc->coarse_ksp) { 7289 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7290 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7291 coarse_mat_reuse = MAT_REUSE_MATRIX; 7292 } else { 7293 coarse_mat = NULL; 7294 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7295 } 7296 7297 /* creates temporary l2gmap and IS for coarse indexes */ 7298 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7299 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7300 7301 /* creates temporary MATIS object for coarse matrix */ 7302 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7303 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7304 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7305 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7306 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7307 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7308 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7309 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7310 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7311 7312 /* count "active" (i.e. with positive local size) and "void" processes */ 7313 im_active = !!(pcis->n); 7314 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7315 7316 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7317 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7318 /* full_restr : just use the receivers from the subassembling pattern */ 7319 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7320 coarse_mat_is = NULL; 7321 multilevel_allowed = PETSC_FALSE; 7322 multilevel_requested = PETSC_FALSE; 7323 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7324 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7325 if (multilevel_requested) { 7326 ncoarse = active_procs/pcbddc->coarsening_ratio; 7327 restr = PETSC_FALSE; 7328 full_restr = PETSC_FALSE; 7329 } else { 7330 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7331 restr = PETSC_TRUE; 7332 full_restr = PETSC_TRUE; 7333 } 7334 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7335 ncoarse = PetscMax(1,ncoarse); 7336 if (!pcbddc->coarse_subassembling) { 7337 if (pcbddc->coarsening_ratio > 1) { 7338 if (multilevel_requested) { 7339 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7340 } else { 7341 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7342 } 7343 } else { 7344 PetscMPIInt rank; 7345 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7346 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7347 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7348 } 7349 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7350 PetscInt psum; 7351 if (pcbddc->coarse_ksp) psum = 1; 7352 else psum = 0; 7353 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7354 if (ncoarse < commsize) have_void = PETSC_TRUE; 7355 } 7356 /* determine if we can go multilevel */ 7357 if (multilevel_requested) { 7358 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7359 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7360 } 7361 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7362 7363 /* dump subassembling pattern */ 7364 if (pcbddc->dbg_flag && multilevel_allowed) { 7365 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7366 } 7367 7368 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7369 nedcfield = -1; 7370 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7371 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7372 const PetscInt *idxs; 7373 ISLocalToGlobalMapping tmap; 7374 7375 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7376 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7377 /* allocate space for temporary storage */ 7378 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7379 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7380 /* allocate for IS array */ 7381 nisdofs = pcbddc->n_ISForDofsLocal; 7382 if (pcbddc->nedclocal) { 7383 if (pcbddc->nedfield > -1) { 7384 nedcfield = pcbddc->nedfield; 7385 } else { 7386 nedcfield = 0; 7387 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7388 nisdofs = 1; 7389 } 7390 } 7391 nisneu = !!pcbddc->NeumannBoundariesLocal; 7392 nisvert = 0; /* nisvert is not used */ 7393 nis = nisdofs + nisneu + nisvert; 7394 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7395 /* dofs splitting */ 7396 for (i=0;i<nisdofs;i++) { 7397 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7398 if (nedcfield != i) { 7399 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7400 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7401 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7402 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7403 } else { 7404 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7405 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7406 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7407 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7408 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7409 } 7410 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7411 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7412 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7413 } 7414 /* neumann boundaries */ 7415 if (pcbddc->NeumannBoundariesLocal) { 7416 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7417 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7418 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7419 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7420 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7421 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7422 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7423 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7424 } 7425 /* free memory */ 7426 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7427 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7428 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7429 } else { 7430 nis = 0; 7431 nisdofs = 0; 7432 nisneu = 0; 7433 nisvert = 0; 7434 isarray = NULL; 7435 } 7436 /* destroy no longer needed map */ 7437 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7438 7439 /* subassemble */ 7440 if (multilevel_allowed) { 7441 Vec vp[1]; 7442 PetscInt nvecs = 0; 7443 PetscBool reuse,reuser; 7444 7445 if (coarse_mat) reuse = PETSC_TRUE; 7446 else reuse = PETSC_FALSE; 7447 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7448 vp[0] = NULL; 7449 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7450 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7451 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7452 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7453 nvecs = 1; 7454 7455 if (pcbddc->divudotp) { 7456 Mat B,loc_divudotp; 7457 Vec v,p; 7458 IS dummy; 7459 PetscInt np; 7460 7461 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7462 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7463 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7464 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7465 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7466 ierr = VecSet(p,1.);CHKERRQ(ierr); 7467 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7468 ierr = VecDestroy(&p);CHKERRQ(ierr); 7469 ierr = MatDestroy(&B);CHKERRQ(ierr); 7470 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7471 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7472 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7473 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7474 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7475 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7476 ierr = VecDestroy(&v);CHKERRQ(ierr); 7477 } 7478 } 7479 if (reuser) { 7480 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7481 } else { 7482 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7483 } 7484 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7485 PetscScalar *arraym,*arrayv; 7486 PetscInt nl; 7487 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7488 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7489 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7490 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7491 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7492 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7493 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7494 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7495 } else { 7496 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7497 } 7498 } else { 7499 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7500 } 7501 if (coarse_mat_is || coarse_mat) { 7502 PetscMPIInt size; 7503 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7504 if (!multilevel_allowed) { 7505 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7506 } else { 7507 Mat A; 7508 7509 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7510 if (coarse_mat_is) { 7511 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7512 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7513 coarse_mat = coarse_mat_is; 7514 } 7515 /* be sure we don't have MatSeqDENSE as local mat */ 7516 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7517 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7518 } 7519 } 7520 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7521 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7522 7523 /* create local to global scatters for coarse problem */ 7524 if (compute_vecs) { 7525 PetscInt lrows; 7526 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7527 if (coarse_mat) { 7528 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7529 } else { 7530 lrows = 0; 7531 } 7532 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7533 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7534 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7535 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7536 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7537 } 7538 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7539 7540 /* set defaults for coarse KSP and PC */ 7541 if (multilevel_allowed) { 7542 coarse_ksp_type = KSPRICHARDSON; 7543 coarse_pc_type = PCBDDC; 7544 } else { 7545 coarse_ksp_type = KSPPREONLY; 7546 coarse_pc_type = PCREDUNDANT; 7547 } 7548 7549 /* print some info if requested */ 7550 if (pcbddc->dbg_flag) { 7551 if (!multilevel_allowed) { 7552 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7553 if (multilevel_requested) { 7554 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7555 } else if (pcbddc->max_levels) { 7556 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7557 } 7558 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7559 } 7560 } 7561 7562 /* communicate coarse discrete gradient */ 7563 coarseG = NULL; 7564 if (pcbddc->nedcG && multilevel_allowed) { 7565 MPI_Comm ccomm; 7566 if (coarse_mat) { 7567 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7568 } else { 7569 ccomm = MPI_COMM_NULL; 7570 } 7571 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7572 } 7573 7574 /* create the coarse KSP object only once with defaults */ 7575 if (coarse_mat) { 7576 PetscViewer dbg_viewer = NULL; 7577 if (pcbddc->dbg_flag) { 7578 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7579 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7580 } 7581 if (!pcbddc->coarse_ksp) { 7582 char prefix[256],str_level[16]; 7583 size_t len; 7584 7585 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7586 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7587 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7588 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7589 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7590 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7591 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7592 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7593 /* TODO is this logic correct? should check for coarse_mat type */ 7594 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7595 /* prefix */ 7596 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7597 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7598 if (!pcbddc->current_level) { 7599 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7600 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7601 } else { 7602 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7603 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7604 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7605 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7606 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7607 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7608 } 7609 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7610 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7611 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7612 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7613 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7614 /* allow user customization */ 7615 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7616 } 7617 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7618 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7619 if (nisdofs) { 7620 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7621 for (i=0;i<nisdofs;i++) { 7622 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7623 } 7624 } 7625 if (nisneu) { 7626 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7627 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7628 } 7629 if (nisvert) { 7630 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7631 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7632 } 7633 if (coarseG) { 7634 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7635 } 7636 7637 /* get some info after set from options */ 7638 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7639 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7640 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7641 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7642 if (isbddc && !multilevel_allowed) { 7643 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7644 isbddc = PETSC_FALSE; 7645 } 7646 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7647 if (multilevel_requested && !isbddc && !isnn) { 7648 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7649 isbddc = PETSC_TRUE; 7650 isnn = PETSC_FALSE; 7651 } 7652 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7653 if (isredundant) { 7654 KSP inner_ksp; 7655 PC inner_pc; 7656 7657 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7658 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7659 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7660 } 7661 7662 /* parameters which miss an API */ 7663 if (isbddc) { 7664 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7665 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7666 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7667 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7668 if (pcbddc_coarse->benign_saddle_point) { 7669 Mat coarsedivudotp_is; 7670 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7671 IS row,col; 7672 const PetscInt *gidxs; 7673 PetscInt n,st,M,N; 7674 7675 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7676 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7677 st = st-n; 7678 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7679 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7680 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7681 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7682 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7683 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7684 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7685 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7686 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7687 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7688 ierr = ISDestroy(&row);CHKERRQ(ierr); 7689 ierr = ISDestroy(&col);CHKERRQ(ierr); 7690 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7691 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7692 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7693 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7694 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7695 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7696 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7697 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7698 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7699 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7700 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7701 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7702 } 7703 } 7704 7705 /* propagate symmetry info of coarse matrix */ 7706 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7707 if (pc->pmat->symmetric_set) { 7708 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7709 } 7710 if (pc->pmat->hermitian_set) { 7711 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7712 } 7713 if (pc->pmat->spd_set) { 7714 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7715 } 7716 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7717 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7718 } 7719 /* set operators */ 7720 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7721 if (pcbddc->dbg_flag) { 7722 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7723 } 7724 } 7725 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7726 ierr = PetscFree(isarray);CHKERRQ(ierr); 7727 #if 0 7728 { 7729 PetscViewer viewer; 7730 char filename[256]; 7731 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7732 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7733 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7734 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7735 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7736 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7737 } 7738 #endif 7739 7740 if (pcbddc->coarse_ksp) { 7741 Vec crhs,csol; 7742 7743 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7744 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7745 if (!csol) { 7746 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7747 } 7748 if (!crhs) { 7749 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7750 } 7751 } 7752 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7753 7754 /* compute null space for coarse solver if the benign trick has been requested */ 7755 if (pcbddc->benign_null) { 7756 7757 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7758 for (i=0;i<pcbddc->benign_n;i++) { 7759 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7760 } 7761 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7762 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7763 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7764 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7765 if (coarse_mat) { 7766 Vec nullv; 7767 PetscScalar *array,*array2; 7768 PetscInt nl; 7769 7770 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7771 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7772 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7773 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7774 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7775 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7776 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7777 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7778 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7779 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7780 } 7781 } 7782 7783 if (pcbddc->coarse_ksp) { 7784 PetscBool ispreonly; 7785 7786 if (CoarseNullSpace) { 7787 PetscBool isnull; 7788 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7789 if (isnull) { 7790 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7791 } 7792 /* TODO: add local nullspaces (if any) */ 7793 } 7794 /* setup coarse ksp */ 7795 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7796 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7797 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7798 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7799 KSP check_ksp; 7800 KSPType check_ksp_type; 7801 PC check_pc; 7802 Vec check_vec,coarse_vec; 7803 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7804 PetscInt its; 7805 PetscBool compute_eigs; 7806 PetscReal *eigs_r,*eigs_c; 7807 PetscInt neigs; 7808 const char *prefix; 7809 7810 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7811 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7812 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7813 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7814 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7815 /* prevent from setup unneeded object */ 7816 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7817 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7818 if (ispreonly) { 7819 check_ksp_type = KSPPREONLY; 7820 compute_eigs = PETSC_FALSE; 7821 } else { 7822 check_ksp_type = KSPGMRES; 7823 compute_eigs = PETSC_TRUE; 7824 } 7825 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7826 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7827 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7828 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7829 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7830 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7831 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7832 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7833 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7834 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7835 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7836 /* create random vec */ 7837 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7838 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7839 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7840 /* solve coarse problem */ 7841 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7842 /* set eigenvalue estimation if preonly has not been requested */ 7843 if (compute_eigs) { 7844 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7845 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7846 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7847 if (neigs) { 7848 lambda_max = eigs_r[neigs-1]; 7849 lambda_min = eigs_r[0]; 7850 if (pcbddc->use_coarse_estimates) { 7851 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7852 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7853 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7854 } 7855 } 7856 } 7857 } 7858 7859 /* check coarse problem residual error */ 7860 if (pcbddc->dbg_flag) { 7861 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7862 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7863 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7864 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7865 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7866 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7867 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7868 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7869 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7870 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7871 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7872 if (CoarseNullSpace) { 7873 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7874 } 7875 if (compute_eigs) { 7876 PetscReal lambda_max_s,lambda_min_s; 7877 KSPConvergedReason reason; 7878 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7879 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7880 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7881 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7882 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7883 for (i=0;i<neigs;i++) { 7884 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7885 } 7886 } 7887 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7888 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7889 } 7890 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7891 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7892 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7893 if (compute_eigs) { 7894 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7895 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7896 } 7897 } 7898 } 7899 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7900 /* print additional info */ 7901 if (pcbddc->dbg_flag) { 7902 /* waits until all processes reaches this point */ 7903 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7904 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7906 } 7907 7908 /* free memory */ 7909 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7910 PetscFunctionReturn(0); 7911 } 7912 7913 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7914 { 7915 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7916 PC_IS* pcis = (PC_IS*)pc->data; 7917 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7918 IS subset,subset_mult,subset_n; 7919 PetscInt local_size,coarse_size=0; 7920 PetscInt *local_primal_indices=NULL; 7921 const PetscInt *t_local_primal_indices; 7922 PetscErrorCode ierr; 7923 7924 PetscFunctionBegin; 7925 /* Compute global number of coarse dofs */ 7926 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7927 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7928 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7929 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7930 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7931 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7932 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7933 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7934 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7935 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7936 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7937 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7938 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7939 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7940 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7941 7942 /* check numbering */ 7943 if (pcbddc->dbg_flag) { 7944 PetscScalar coarsesum,*array,*array2; 7945 PetscInt i; 7946 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7947 7948 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7949 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7950 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7951 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7952 /* counter */ 7953 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7954 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7955 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7956 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7957 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7958 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7959 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7960 for (i=0;i<pcbddc->local_primal_size;i++) { 7961 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7962 } 7963 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7964 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7965 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7966 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7967 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7968 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7969 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7970 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7971 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7972 for (i=0;i<pcis->n;i++) { 7973 if (array[i] != 0.0 && array[i] != array2[i]) { 7974 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7975 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7976 set_error = PETSC_TRUE; 7977 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7978 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7979 } 7980 } 7981 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7982 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7983 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7984 for (i=0;i<pcis->n;i++) { 7985 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7986 } 7987 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7988 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7989 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7990 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7991 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7992 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7993 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7994 PetscInt *gidxs; 7995 7996 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7997 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7998 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8001 for (i=0;i<pcbddc->local_primal_size;i++) { 8002 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8003 } 8004 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8005 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8006 } 8007 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8008 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8009 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8010 } 8011 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8012 /* get back data */ 8013 *coarse_size_n = coarse_size; 8014 *local_primal_indices_n = local_primal_indices; 8015 PetscFunctionReturn(0); 8016 } 8017 8018 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8019 { 8020 IS localis_t; 8021 PetscInt i,lsize,*idxs,n; 8022 PetscScalar *vals; 8023 PetscErrorCode ierr; 8024 8025 PetscFunctionBegin; 8026 /* get indices in local ordering exploiting local to global map */ 8027 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8028 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8029 for (i=0;i<lsize;i++) vals[i] = 1.0; 8030 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8031 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8032 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8033 if (idxs) { /* multilevel guard */ 8034 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8035 } 8036 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8037 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8038 ierr = PetscFree(vals);CHKERRQ(ierr); 8039 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8040 /* now compute set in local ordering */ 8041 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8042 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8043 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8044 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8045 for (i=0,lsize=0;i<n;i++) { 8046 if (PetscRealPart(vals[i]) > 0.5) { 8047 lsize++; 8048 } 8049 } 8050 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8051 for (i=0,lsize=0;i<n;i++) { 8052 if (PetscRealPart(vals[i]) > 0.5) { 8053 idxs[lsize++] = i; 8054 } 8055 } 8056 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8057 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8058 *localis = localis_t; 8059 PetscFunctionReturn(0); 8060 } 8061 8062 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8063 { 8064 PC_IS *pcis=(PC_IS*)pc->data; 8065 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8066 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8067 Mat S_j; 8068 PetscInt *used_xadj,*used_adjncy; 8069 PetscBool free_used_adj; 8070 PetscErrorCode ierr; 8071 8072 PetscFunctionBegin; 8073 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8074 free_used_adj = PETSC_FALSE; 8075 if (pcbddc->sub_schurs_layers == -1) { 8076 used_xadj = NULL; 8077 used_adjncy = NULL; 8078 } else { 8079 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8080 used_xadj = pcbddc->mat_graph->xadj; 8081 used_adjncy = pcbddc->mat_graph->adjncy; 8082 } else if (pcbddc->computed_rowadj) { 8083 used_xadj = pcbddc->mat_graph->xadj; 8084 used_adjncy = pcbddc->mat_graph->adjncy; 8085 } else { 8086 PetscBool flg_row=PETSC_FALSE; 8087 const PetscInt *xadj,*adjncy; 8088 PetscInt nvtxs; 8089 8090 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8091 if (flg_row) { 8092 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8093 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8094 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8095 free_used_adj = PETSC_TRUE; 8096 } else { 8097 pcbddc->sub_schurs_layers = -1; 8098 used_xadj = NULL; 8099 used_adjncy = NULL; 8100 } 8101 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8102 } 8103 } 8104 8105 /* setup sub_schurs data */ 8106 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8107 if (!sub_schurs->schur_explicit) { 8108 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8109 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8110 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8111 } else { 8112 Mat change = NULL; 8113 Vec scaling = NULL; 8114 IS change_primal = NULL, iP; 8115 PetscInt benign_n; 8116 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8117 PetscBool isseqaij,need_change = PETSC_FALSE; 8118 PetscBool discrete_harmonic = PETSC_FALSE; 8119 8120 if (!pcbddc->use_vertices && reuse_solvers) { 8121 PetscInt n_vertices; 8122 8123 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8124 reuse_solvers = (PetscBool)!n_vertices; 8125 } 8126 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8127 if (!isseqaij) { 8128 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8129 if (matis->A == pcbddc->local_mat) { 8130 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8131 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8132 } else { 8133 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8134 } 8135 } 8136 if (!pcbddc->benign_change_explicit) { 8137 benign_n = pcbddc->benign_n; 8138 } else { 8139 benign_n = 0; 8140 } 8141 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8142 We need a global reduction to avoid possible deadlocks. 8143 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8144 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8145 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8146 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8147 need_change = (PetscBool)(!need_change); 8148 } 8149 /* If the user defines additional constraints, we import them here. 8150 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8151 if (need_change) { 8152 PC_IS *pcisf; 8153 PC_BDDC *pcbddcf; 8154 PC pcf; 8155 8156 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8157 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8158 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8159 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8160 8161 /* hacks */ 8162 pcisf = (PC_IS*)pcf->data; 8163 pcisf->is_B_local = pcis->is_B_local; 8164 pcisf->vec1_N = pcis->vec1_N; 8165 pcisf->BtoNmap = pcis->BtoNmap; 8166 pcisf->n = pcis->n; 8167 pcisf->n_B = pcis->n_B; 8168 pcbddcf = (PC_BDDC*)pcf->data; 8169 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8170 pcbddcf->mat_graph = pcbddc->mat_graph; 8171 pcbddcf->use_faces = PETSC_TRUE; 8172 pcbddcf->use_change_of_basis = PETSC_TRUE; 8173 pcbddcf->use_change_on_faces = PETSC_TRUE; 8174 pcbddcf->use_qr_single = PETSC_TRUE; 8175 pcbddcf->fake_change = PETSC_TRUE; 8176 8177 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8178 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8179 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8180 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8181 change = pcbddcf->ConstraintMatrix; 8182 pcbddcf->ConstraintMatrix = NULL; 8183 8184 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8185 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8186 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8187 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8188 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8189 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8190 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8191 pcf->ops->destroy = NULL; 8192 pcf->ops->reset = NULL; 8193 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8194 } 8195 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8196 8197 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8198 if (iP) { 8199 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8200 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8201 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8202 } 8203 if (discrete_harmonic) { 8204 Mat A; 8205 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8206 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8207 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8208 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8209 ierr = MatDestroy(&A);CHKERRQ(ierr); 8210 } else { 8211 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8212 } 8213 ierr = MatDestroy(&change);CHKERRQ(ierr); 8214 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8215 } 8216 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8217 8218 /* free adjacency */ 8219 if (free_used_adj) { 8220 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8221 } 8222 PetscFunctionReturn(0); 8223 } 8224 8225 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8226 { 8227 PC_IS *pcis=(PC_IS*)pc->data; 8228 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8229 PCBDDCGraph graph; 8230 PetscErrorCode ierr; 8231 8232 PetscFunctionBegin; 8233 /* attach interface graph for determining subsets */ 8234 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8235 IS verticesIS,verticescomm; 8236 PetscInt vsize,*idxs; 8237 8238 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8239 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8240 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8242 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8243 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8244 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8245 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8246 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8247 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8248 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8249 } else { 8250 graph = pcbddc->mat_graph; 8251 } 8252 /* print some info */ 8253 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8254 IS vertices; 8255 PetscInt nv,nedges,nfaces; 8256 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8257 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8258 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8259 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8260 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8261 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8263 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8264 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8265 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8266 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8267 } 8268 8269 /* sub_schurs init */ 8270 if (!pcbddc->sub_schurs) { 8271 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8272 } 8273 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8274 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8275 8276 /* free graph struct */ 8277 if (pcbddc->sub_schurs_rebuild) { 8278 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8279 } 8280 PetscFunctionReturn(0); 8281 } 8282 8283 PetscErrorCode PCBDDCCheckOperator(PC pc) 8284 { 8285 PC_IS *pcis=(PC_IS*)pc->data; 8286 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8287 PetscErrorCode ierr; 8288 8289 PetscFunctionBegin; 8290 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8291 IS zerodiag = NULL; 8292 Mat S_j,B0_B=NULL; 8293 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8294 PetscScalar *p0_check,*array,*array2; 8295 PetscReal norm; 8296 PetscInt i; 8297 8298 /* B0 and B0_B */ 8299 if (zerodiag) { 8300 IS dummy; 8301 8302 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8303 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8304 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8305 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8306 } 8307 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8308 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8309 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8310 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8311 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8312 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8313 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8314 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8315 /* S_j */ 8316 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8317 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8318 8319 /* mimic vector in \widetilde{W}_\Gamma */ 8320 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8321 /* continuous in primal space */ 8322 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8323 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8324 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8325 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8326 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8327 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8328 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8329 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8330 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8331 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8332 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8333 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8334 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8335 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8336 8337 /* assemble rhs for coarse problem */ 8338 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8339 /* local with Schur */ 8340 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8341 if (zerodiag) { 8342 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8343 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8344 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8345 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8346 } 8347 /* sum on primal nodes the local contributions */ 8348 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8349 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8350 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8351 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8352 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8353 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8354 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8355 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8356 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8357 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8358 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8359 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8360 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8361 /* scale primal nodes (BDDC sums contibutions) */ 8362 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8363 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8364 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8365 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8366 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8367 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8368 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8369 /* global: \widetilde{B0}_B w_\Gamma */ 8370 if (zerodiag) { 8371 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8372 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8373 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8374 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8375 } 8376 /* BDDC */ 8377 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8378 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8379 8380 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8381 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8382 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8383 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8384 for (i=0;i<pcbddc->benign_n;i++) { 8385 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8386 } 8387 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8388 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8389 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8390 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8391 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8392 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8393 } 8394 PetscFunctionReturn(0); 8395 } 8396 8397 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8398 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8399 { 8400 Mat At; 8401 IS rows; 8402 PetscInt rst,ren; 8403 PetscErrorCode ierr; 8404 PetscLayout rmap; 8405 8406 PetscFunctionBegin; 8407 rst = ren = 0; 8408 if (ccomm != MPI_COMM_NULL) { 8409 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8410 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8411 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8412 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8413 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8414 } 8415 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8416 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8417 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8418 8419 if (ccomm != MPI_COMM_NULL) { 8420 Mat_MPIAIJ *a,*b; 8421 IS from,to; 8422 Vec gvec; 8423 PetscInt lsize; 8424 8425 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8426 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8427 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8428 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8429 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8430 a = (Mat_MPIAIJ*)At->data; 8431 b = (Mat_MPIAIJ*)(*B)->data; 8432 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8433 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8434 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8435 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8436 b->A = a->A; 8437 b->B = a->B; 8438 8439 b->donotstash = a->donotstash; 8440 b->roworiented = a->roworiented; 8441 b->rowindices = 0; 8442 b->rowvalues = 0; 8443 b->getrowactive = PETSC_FALSE; 8444 8445 (*B)->rmap = rmap; 8446 (*B)->factortype = A->factortype; 8447 (*B)->assembled = PETSC_TRUE; 8448 (*B)->insertmode = NOT_SET_VALUES; 8449 (*B)->preallocated = PETSC_TRUE; 8450 8451 if (a->colmap) { 8452 #if defined(PETSC_USE_CTABLE) 8453 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8454 #else 8455 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8456 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8457 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8458 #endif 8459 } else b->colmap = 0; 8460 if (a->garray) { 8461 PetscInt len; 8462 len = a->B->cmap->n; 8463 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8464 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8465 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8466 } else b->garray = 0; 8467 8468 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8469 b->lvec = a->lvec; 8470 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8471 8472 /* cannot use VecScatterCopy */ 8473 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8474 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8475 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8476 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8477 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8478 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8479 ierr = ISDestroy(&from);CHKERRQ(ierr); 8480 ierr = ISDestroy(&to);CHKERRQ(ierr); 8481 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8482 } 8483 ierr = MatDestroy(&At);CHKERRQ(ierr); 8484 PetscFunctionReturn(0); 8485 } 8486