1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscdm.h> 5 #include <petscblaslapack.h> 6 #include <petsc/private/sfimpl.h> 7 8 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 9 10 /* if range is true, it returns B s.t. span{B} = range(A) 11 if range is false, it returns B s.t. range(B) _|_ range(A) */ 12 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 13 { 14 #if !defined(PETSC_USE_COMPLEX) 15 PetscScalar *uwork,*data,*U, ds = 0.; 16 PetscReal *sing; 17 PetscBLASInt bM,bN,lwork,lierr,di = 1; 18 PetscInt ulw,i,nr,nc,n; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 #if defined(PETSC_MISSING_LAPACK_GESVD) 23 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 24 #else 25 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 26 if (!nr || !nc) PetscFunctionReturn(0); 27 28 /* workspace */ 29 if (!work) { 30 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 31 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 32 } else { 33 ulw = lw; 34 uwork = work; 35 } 36 n = PetscMin(nr,nc); 37 if (!rwork) { 38 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 39 } else { 40 sing = rwork; 41 } 42 43 /* SVD */ 44 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 48 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 49 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 50 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 51 ierr = PetscFPTrapPop();CHKERRQ(ierr); 52 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 53 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 54 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 55 if (!rwork) { 56 ierr = PetscFree(sing);CHKERRQ(ierr); 57 } 58 if (!work) { 59 ierr = PetscFree(uwork);CHKERRQ(ierr); 60 } 61 /* create B */ 62 if (!range) { 63 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 64 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 65 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 66 } else { 67 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 68 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 69 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 70 } 71 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscFree(U);CHKERRQ(ierr); 73 #endif 74 #else /* PETSC_USE_COMPLEX */ 75 PetscFunctionBegin; 76 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 77 #endif 78 PetscFunctionReturn(0); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 88 { 89 PetscErrorCode ierr; 90 Mat GE,GEd; 91 PetscInt rsize,csize,esize; 92 PetscScalar *ptr; 93 94 PetscFunctionBegin; 95 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 96 if (!esize) PetscFunctionReturn(0); 97 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 98 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 99 100 /* gradients */ 101 ptr = work + 5*esize; 102 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 103 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 104 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 105 ierr = MatDestroy(&GE);CHKERRQ(ierr); 106 107 /* constants */ 108 ptr += rsize*csize; 109 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 110 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 111 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 112 ierr = MatDestroy(&GE);CHKERRQ(ierr); 113 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 114 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 115 116 if (corners) { 117 Mat GEc; 118 PetscScalar *vals,v; 119 120 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 121 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 122 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 123 /* v = PetscAbsScalar(vals[0]) */; 124 v = 1.; 125 cvals[0] = vals[0]/v; 126 cvals[1] = vals[1]/v; 127 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 128 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 129 #if defined(PRINT_GDET) 130 { 131 PetscViewer viewer; 132 char filename[256]; 133 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 134 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 135 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 136 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 137 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 138 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 139 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 141 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 142 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 143 } 144 #endif 145 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 146 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 147 } 148 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode PCBDDCNedelecSupport(PC pc) 153 { 154 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 155 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 156 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 157 Vec tvec; 158 PetscSF sfv; 159 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 160 MPI_Comm comm; 161 IS lned,primals,allprimals,nedfieldlocal; 162 IS *eedges,*extrows,*extcols,*alleedges; 163 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 164 PetscScalar *vals,*work; 165 PetscReal *rwork; 166 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 167 PetscInt ne,nv,Lv,order,n,field; 168 PetscInt n_neigh,*neigh,*n_shared,**shared; 169 PetscInt i,j,extmem,cum,maxsize,nee; 170 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 171 PetscInt *sfvleaves,*sfvroots; 172 PetscInt *corners,*cedges; 173 PetscInt *ecount,**eneighs,*vcount,**vneighs; 174 #if defined(PETSC_USE_DEBUG) 175 PetscInt *emarks; 176 #endif 177 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 /* If the discrete gradient is defined for a subset of dofs and global is true, 182 it assumes G is given in global ordering for all the dofs. 183 Otherwise, the ordering is global for the Nedelec field */ 184 order = pcbddc->nedorder; 185 conforming = pcbddc->conforming; 186 field = pcbddc->nedfield; 187 global = pcbddc->nedglobal; 188 setprimal = PETSC_FALSE; 189 print = PETSC_FALSE; 190 singular = PETSC_FALSE; 191 192 /* Command line customization */ 193 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 194 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 195 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 196 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 197 /* print debug info TODO: to be removed */ 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsEnd();CHKERRQ(ierr); 200 201 /* Return if there are no edges in the decomposition and the problem is not singular */ 202 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 203 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 204 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 205 if (!singular) { 206 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 207 lrc[0] = PETSC_FALSE; 208 for (i=0;i<n;i++) { 209 if (PetscRealPart(vals[i]) > 2.) { 210 lrc[0] = PETSC_TRUE; 211 break; 212 } 213 } 214 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 215 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 216 if (!lrc[1]) PetscFunctionReturn(0); 217 } 218 219 /* Get Nedelec field */ 220 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 221 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 222 if (pcbddc->n_ISForDofsLocal && field >= 0) { 223 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 224 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 225 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 226 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 227 ne = n; 228 nedfieldlocal = NULL; 229 global = PETSC_TRUE; 230 } else if (field == PETSC_DECIDE) { 231 PetscInt rst,ren,*idx; 232 233 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 234 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 235 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 236 for (i=rst;i<ren;i++) { 237 PetscInt nc; 238 239 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 240 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 241 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 242 } 243 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 244 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 245 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 246 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 247 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 248 } else { 249 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 250 } 251 252 /* Sanity checks */ 253 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 254 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 255 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 256 257 /* Just set primal dofs and return */ 258 if (setprimal) { 259 IS enedfieldlocal; 260 PetscInt *eidxs; 261 262 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 263 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 264 if (nedfieldlocal) { 265 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 266 for (i=0,cum=0;i<ne;i++) { 267 if (PetscRealPart(vals[idxs[i]]) > 2.) { 268 eidxs[cum++] = idxs[i]; 269 } 270 } 271 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 272 } else { 273 for (i=0,cum=0;i<ne;i++) { 274 if (PetscRealPart(vals[i]) > 2.) { 275 eidxs[cum++] = i; 276 } 277 } 278 } 279 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 280 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 281 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 282 ierr = PetscFree(eidxs);CHKERRQ(ierr); 283 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 284 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 /* Compute some l2g maps */ 289 if (nedfieldlocal) { 290 IS is; 291 292 /* need to map from the local Nedelec field to local numbering */ 293 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 294 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 295 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 296 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 298 if (global) { 299 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 300 el2g = al2g; 301 } else { 302 IS gis; 303 304 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 305 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 306 ierr = ISDestroy(&gis);CHKERRQ(ierr); 307 } 308 ierr = ISDestroy(&is);CHKERRQ(ierr); 309 } else { 310 /* restore default */ 311 pcbddc->nedfield = -1; 312 /* one ref for the destruction of al2g, one for el2g */ 313 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 314 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 315 el2g = al2g; 316 fl2g = NULL; 317 } 318 319 /* Start communication to drop connections for interior edges (for cc analysis only) */ 320 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 321 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 322 if (nedfieldlocal) { 323 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 324 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 325 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 326 } else { 327 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 328 } 329 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 330 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 331 332 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 333 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 334 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 335 if (global) { 336 PetscInt rst; 337 338 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 339 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 340 if (matis->sf_rootdata[i] < 2) { 341 matis->sf_rootdata[cum++] = i + rst; 342 } 343 } 344 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 345 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 346 } else { 347 PetscInt *tbz; 348 349 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 350 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 351 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 352 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 353 for (i=0,cum=0;i<ne;i++) 354 if (matis->sf_leafdata[idxs[i]] == 1) 355 tbz[cum++] = i; 356 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 357 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 358 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 359 ierr = PetscFree(tbz);CHKERRQ(ierr); 360 } 361 } else { /* we need the entire G to infer the nullspace */ 362 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 363 G = pcbddc->discretegradient; 364 } 365 366 /* Extract subdomain relevant rows of G */ 367 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 368 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 369 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISDestroy(&lned);CHKERRQ(ierr); 372 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 373 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 374 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 375 376 /* SF for nodal dofs communications */ 377 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 378 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 379 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 380 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 381 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 382 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 383 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 384 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 385 i = singular ? 2 : 1; 386 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 387 388 /* Destroy temporary G created in MATIS format and modified G */ 389 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 390 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 391 ierr = MatDestroy(&G);CHKERRQ(ierr); 392 393 if (print) { 394 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 395 ierr = MatView(lG,NULL);CHKERRQ(ierr); 396 } 397 398 /* Save lG for values insertion in change of basis */ 399 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 400 401 /* Analyze the edge-nodes connections (duplicate lG) */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 403 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 404 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 405 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 408 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 409 /* need to import the boundary specification to ensure the 410 proper detection of coarse edges' endpoints */ 411 if (pcbddc->DirichletBoundariesLocal) { 412 IS is; 413 414 if (fl2g) { 415 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 416 } else { 417 is = pcbddc->DirichletBoundariesLocal; 418 } 419 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 420 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 421 for (i=0;i<cum;i++) { 422 if (idxs[i] >= 0) { 423 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 424 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 425 } 426 } 427 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 428 if (fl2g) { 429 ierr = ISDestroy(&is);CHKERRQ(ierr); 430 } 431 } 432 if (pcbddc->NeumannBoundariesLocal) { 433 IS is; 434 435 if (fl2g) { 436 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 437 } else { 438 is = pcbddc->NeumannBoundariesLocal; 439 } 440 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 441 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 442 for (i=0;i<cum;i++) { 443 if (idxs[i] >= 0) { 444 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 445 } 446 } 447 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 448 if (fl2g) { 449 ierr = ISDestroy(&is);CHKERRQ(ierr); 450 } 451 } 452 453 /* Count neighs per dof */ 454 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 455 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 456 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 457 for (i=1,cum=0;i<n_neigh;i++) { 458 cum += n_shared[i]; 459 for (j=0;j<n_shared[i];j++) { 460 ecount[shared[i][j]]++; 461 } 462 } 463 if (ne) { 464 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 465 } 466 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 467 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 468 for (i=1;i<n_neigh;i++) { 469 for (j=0;j<n_shared[i];j++) { 470 PetscInt k = shared[i][j]; 471 eneighs[k][ecount[k]] = neigh[i]; 472 ecount[k]++; 473 } 474 } 475 for (i=0;i<ne;i++) { 476 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 477 } 478 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 479 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 480 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 481 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 for (i=1,cum=0;i<n_neigh;i++) { 483 cum += n_shared[i]; 484 for (j=0;j<n_shared[i];j++) { 485 vcount[shared[i][j]]++; 486 } 487 } 488 if (nv) { 489 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 490 } 491 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 492 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 493 for (i=1;i<n_neigh;i++) { 494 for (j=0;j<n_shared[i];j++) { 495 PetscInt k = shared[i][j]; 496 vneighs[k][vcount[k]] = neigh[i]; 497 vcount[k]++; 498 } 499 } 500 for (i=0;i<nv;i++) { 501 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 502 } 503 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 504 505 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 506 for proper detection of coarse edges' endpoints */ 507 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 508 for (i=0;i<ne;i++) { 509 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 510 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 511 } 512 } 513 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 514 if (!conforming) { 515 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 516 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 517 } 518 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 519 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 520 cum = 0; 521 for (i=0;i<ne;i++) { 522 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 523 if (!PetscBTLookup(btee,i)) { 524 marks[cum++] = i; 525 continue; 526 } 527 /* set badly connected edge dofs as primal */ 528 if (!conforming) { 529 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 530 marks[cum++] = i; 531 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 532 for (j=ii[i];j<ii[i+1];j++) { 533 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 534 } 535 } else { 536 /* every edge dofs should be connected trough a certain number of nodal dofs 537 to other edge dofs belonging to coarse edges 538 - at most 2 endpoints 539 - order-1 interior nodal dofs 540 - no undefined nodal dofs (nconn < order) 541 */ 542 PetscInt ends = 0,ints = 0, undef = 0; 543 for (j=ii[i];j<ii[i+1];j++) { 544 PetscInt v = jj[j],k; 545 PetscInt nconn = iit[v+1]-iit[v]; 546 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 547 if (nconn > order) ends++; 548 else if (nconn == order) ints++; 549 else undef++; 550 } 551 if (undef || ends > 2 || ints != order -1) { 552 marks[cum++] = i; 553 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 554 for (j=ii[i];j<ii[i+1];j++) { 555 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 556 } 557 } 558 } 559 } 560 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 561 if (!order && ii[i+1] != ii[i]) { 562 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 563 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 564 } 565 } 566 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 567 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 568 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 569 if (!conforming) { 570 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 571 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 572 } 573 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 574 575 /* identify splitpoints and corner candidates */ 576 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 577 if (print) { 578 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 579 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 580 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 581 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 582 } 583 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 584 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 585 for (i=0;i<nv;i++) { 586 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 587 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 588 if (!order) { /* variable order */ 589 PetscReal vorder = 0.; 590 591 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 592 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 593 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 594 ord = 1; 595 } 596 #if defined(PETSC_USE_DEBUG) 597 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 598 #endif 599 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 600 if (PetscBTLookup(btbd,jj[j])) { 601 bdir = PETSC_TRUE; 602 break; 603 } 604 if (vc != ecount[jj[j]]) { 605 sneighs = PETSC_FALSE; 606 } else { 607 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 608 for (k=0;k<vc;k++) { 609 if (vn[k] != en[k]) { 610 sneighs = PETSC_FALSE; 611 break; 612 } 613 } 614 } 615 } 616 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 617 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 618 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 619 } else if (test == ord) { 620 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 621 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 622 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 623 } else { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 625 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 626 } 627 } 628 } 629 ierr = PetscFree(ecount);CHKERRQ(ierr); 630 ierr = PetscFree(vcount);CHKERRQ(ierr); 631 if (ne) { 632 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 633 } 634 if (nv) { 635 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 636 } 637 ierr = PetscFree(eneighs);CHKERRQ(ierr); 638 ierr = PetscFree(vneighs);CHKERRQ(ierr); 639 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 640 641 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 642 if (order != 1) { 643 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 644 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 645 for (i=0;i<nv;i++) { 646 if (PetscBTLookup(btvcand,i)) { 647 PetscBool found = PETSC_FALSE; 648 for (j=ii[i];j<ii[i+1] && !found;j++) { 649 PetscInt k,e = jj[j]; 650 if (PetscBTLookup(bte,e)) continue; 651 for (k=iit[e];k<iit[e+1];k++) { 652 PetscInt v = jjt[k]; 653 if (v != i && PetscBTLookup(btvcand,v)) { 654 found = PETSC_TRUE; 655 break; 656 } 657 } 658 } 659 if (!found) { 660 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 661 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 662 } else { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 664 } 665 } 666 } 667 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 668 } 669 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 670 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 671 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 672 673 /* Get the local G^T explicitly */ 674 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 675 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 676 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 677 678 /* Mark interior nodal dofs */ 679 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 680 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 681 for (i=1;i<n_neigh;i++) { 682 for (j=0;j<n_shared[i];j++) { 683 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 684 } 685 } 686 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 687 688 /* communicate corners and splitpoints */ 689 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 690 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 691 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 692 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 693 694 if (print) { 695 IS tbz; 696 697 cum = 0; 698 for (i=0;i<nv;i++) 699 if (sfvleaves[i]) 700 vmarks[cum++] = i; 701 702 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 703 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 704 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 705 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 706 } 707 708 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 709 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 710 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 711 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 712 713 /* Zero rows of lGt corresponding to identified corners 714 and interior nodal dofs */ 715 cum = 0; 716 for (i=0;i<nv;i++) { 717 if (sfvleaves[i]) { 718 vmarks[cum++] = i; 719 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 720 } 721 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 722 } 723 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 724 if (print) { 725 IS tbz; 726 727 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 728 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 729 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 730 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 731 } 732 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 733 ierr = PetscFree(vmarks);CHKERRQ(ierr); 734 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 735 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 736 737 /* Recompute G */ 738 ierr = MatDestroy(&lG);CHKERRQ(ierr); 739 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 740 if (print) { 741 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 742 ierr = MatView(lG,NULL);CHKERRQ(ierr); 743 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 744 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 745 } 746 747 /* Get primal dofs (if any) */ 748 cum = 0; 749 for (i=0;i<ne;i++) { 750 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 751 } 752 if (fl2g) { 753 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 754 } 755 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 756 if (print) { 757 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 758 ierr = ISView(primals,NULL);CHKERRQ(ierr); 759 } 760 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 761 /* TODO: what if the user passed in some of them ? */ 762 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 763 ierr = ISDestroy(&primals);CHKERRQ(ierr); 764 765 /* Compute edge connectivity */ 766 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 767 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 768 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 769 if (fl2g) { 770 PetscBT btf; 771 PetscInt *iia,*jja,*iiu,*jju; 772 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 773 774 /* create CSR for all local dofs */ 775 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 776 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 777 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 778 iiu = pcbddc->mat_graph->xadj; 779 jju = pcbddc->mat_graph->adjncy; 780 } else if (pcbddc->use_local_adj) { 781 rest = PETSC_TRUE; 782 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 783 } else { 784 free = PETSC_TRUE; 785 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 786 iiu[0] = 0; 787 for (i=0;i<n;i++) { 788 iiu[i+1] = i+1; 789 jju[i] = -1; 790 } 791 } 792 793 /* import sizes of CSR */ 794 iia[0] = 0; 795 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 796 797 /* overwrite entries corresponding to the Nedelec field */ 798 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 799 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 800 for (i=0;i<ne;i++) { 801 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 802 iia[idxs[i]+1] = ii[i+1]-ii[i]; 803 } 804 805 /* iia in CSR */ 806 for (i=0;i<n;i++) iia[i+1] += iia[i]; 807 808 /* jja in CSR */ 809 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 810 for (i=0;i<n;i++) 811 if (!PetscBTLookup(btf,i)) 812 for (j=0;j<iiu[i+1]-iiu[i];j++) 813 jja[iia[i]+j] = jju[iiu[i]+j]; 814 815 /* map edge dofs connectivity */ 816 if (jj) { 817 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 818 for (i=0;i<ne;i++) { 819 PetscInt e = idxs[i]; 820 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 821 } 822 } 823 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 824 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 825 if (rest) { 826 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 827 } 828 if (free) { 829 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 830 } 831 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 832 } else { 833 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 834 } 835 836 /* Analyze interface for edge dofs */ 837 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 838 pcbddc->mat_graph->twodim = PETSC_FALSE; 839 840 /* Get coarse edges in the edge space */ 841 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 842 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 843 844 if (fl2g) { 845 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 846 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 847 for (i=0;i<nee;i++) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 849 } 850 } else { 851 eedges = alleedges; 852 primals = allprimals; 853 } 854 855 /* Mark fine edge dofs with their coarse edge id */ 856 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 857 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 858 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 859 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 860 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 861 if (print) { 862 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 863 ierr = ISView(primals,NULL);CHKERRQ(ierr); 864 } 865 866 maxsize = 0; 867 for (i=0;i<nee;i++) { 868 PetscInt size,mark = i+1; 869 870 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 871 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 872 for (j=0;j<size;j++) marks[idxs[j]] = mark; 873 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 874 maxsize = PetscMax(maxsize,size); 875 } 876 877 /* Find coarse edge endpoints */ 878 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 879 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 880 for (i=0;i<nee;i++) { 881 PetscInt mark = i+1,size; 882 883 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 884 if (!size && nedfieldlocal) continue; 885 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 886 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 887 if (print) { 888 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 889 ISView(eedges[i],NULL); 890 } 891 for (j=0;j<size;j++) { 892 PetscInt k, ee = idxs[j]; 893 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 894 for (k=ii[ee];k<ii[ee+1];k++) { 895 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 896 if (PetscBTLookup(btv,jj[k])) { 897 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 898 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 899 PetscInt k2; 900 PetscBool corner = PETSC_FALSE; 901 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 902 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 903 /* it's a corner if either is connected with an edge dof belonging to a different cc or 904 if the edge dof lie on the natural part of the boundary */ 905 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 906 corner = PETSC_TRUE; 907 break; 908 } 909 } 910 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 911 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 912 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 913 } else { 914 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 915 } 916 } 917 } 918 } 919 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 920 } 921 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 922 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 923 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 924 925 /* Reset marked primal dofs */ 926 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 927 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 928 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 929 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 930 931 /* Now use the initial lG */ 932 ierr = MatDestroy(&lG);CHKERRQ(ierr); 933 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 934 lG = lGinit; 935 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 936 937 /* Compute extended cols indices */ 938 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 939 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 940 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 941 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 942 i *= maxsize; 943 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 944 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 945 eerr = PETSC_FALSE; 946 for (i=0;i<nee;i++) { 947 PetscInt size,found = 0; 948 949 cum = 0; 950 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 951 if (!size && nedfieldlocal) continue; 952 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 953 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 954 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 955 for (j=0;j<size;j++) { 956 PetscInt k,ee = idxs[j]; 957 for (k=ii[ee];k<ii[ee+1];k++) { 958 PetscInt vv = jj[k]; 959 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 960 else if (!PetscBTLookupSet(btvc,vv)) found++; 961 } 962 } 963 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 964 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 965 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 966 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 967 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 968 /* it may happen that endpoints are not defined at this point 969 if it is the case, mark this edge for a second pass */ 970 if (cum != size -1 || found != 2) { 971 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 972 if (print) { 973 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 974 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 975 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 976 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 977 } 978 eerr = PETSC_TRUE; 979 } 980 } 981 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 982 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 983 if (done) { 984 PetscInt *newprimals; 985 986 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 987 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 988 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 989 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 990 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 991 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 992 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 993 for (i=0;i<nee;i++) { 994 PetscBool has_candidates = PETSC_FALSE; 995 if (PetscBTLookup(bter,i)) { 996 PetscInt size,mark = i+1; 997 998 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 999 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1000 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1001 for (j=0;j<size;j++) { 1002 PetscInt k,ee = idxs[j]; 1003 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1004 for (k=ii[ee];k<ii[ee+1];k++) { 1005 /* set all candidates located on the edge as corners */ 1006 if (PetscBTLookup(btvcand,jj[k])) { 1007 PetscInt k2,vv = jj[k]; 1008 has_candidates = PETSC_TRUE; 1009 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1010 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1011 /* set all edge dofs connected to candidate as primals */ 1012 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1013 if (marks[jjt[k2]] == mark) { 1014 PetscInt k3,ee2 = jjt[k2]; 1015 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1016 newprimals[cum++] = ee2; 1017 /* finally set the new corners */ 1018 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1019 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1020 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1021 } 1022 } 1023 } 1024 } else { 1025 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1026 } 1027 } 1028 } 1029 if (!has_candidates) { /* circular edge */ 1030 PetscInt k, ee = idxs[0],*tmarks; 1031 1032 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1033 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1034 for (k=ii[ee];k<ii[ee+1];k++) { 1035 PetscInt k2; 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1037 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1038 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1039 } 1040 for (j=0;j<size;j++) { 1041 if (tmarks[idxs[j]] > 1) { 1042 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1043 newprimals[cum++] = idxs[j]; 1044 } 1045 } 1046 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1047 } 1048 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1049 } 1050 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1051 } 1052 ierr = PetscFree(extcols);CHKERRQ(ierr); 1053 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1054 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1055 if (fl2g) { 1056 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1057 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1058 for (i=0;i<nee;i++) { 1059 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1060 } 1061 ierr = PetscFree(eedges);CHKERRQ(ierr); 1062 } 1063 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1064 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1065 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1066 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1067 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1068 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1069 pcbddc->mat_graph->twodim = PETSC_FALSE; 1070 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1071 if (fl2g) { 1072 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1073 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1074 for (i=0;i<nee;i++) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1076 } 1077 } else { 1078 eedges = alleedges; 1079 primals = allprimals; 1080 } 1081 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1082 1083 /* Mark again */ 1084 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1085 for (i=0;i<nee;i++) { 1086 PetscInt size,mark = i+1; 1087 1088 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1089 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1090 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1091 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1092 } 1093 if (print) { 1094 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1095 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1096 } 1097 1098 /* Recompute extended cols */ 1099 eerr = PETSC_FALSE; 1100 for (i=0;i<nee;i++) { 1101 PetscInt size; 1102 1103 cum = 0; 1104 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1105 if (!size && nedfieldlocal) continue; 1106 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1107 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1108 for (j=0;j<size;j++) { 1109 PetscInt k,ee = idxs[j]; 1110 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1111 } 1112 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1113 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1114 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1115 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1116 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1117 if (cum != size -1) { 1118 if (print) { 1119 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1120 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1122 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1123 } 1124 eerr = PETSC_TRUE; 1125 } 1126 } 1127 } 1128 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1129 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1130 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1131 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1132 /* an error should not occur at this point */ 1133 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1134 1135 /* Check the number of endpoints */ 1136 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1137 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1138 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1139 for (i=0;i<nee;i++) { 1140 PetscInt size, found = 0, gc[2]; 1141 1142 /* init with defaults */ 1143 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1144 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1145 if (!size && nedfieldlocal) continue; 1146 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1147 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1148 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1149 for (j=0;j<size;j++) { 1150 PetscInt k,ee = idxs[j]; 1151 for (k=ii[ee];k<ii[ee+1];k++) { 1152 PetscInt vv = jj[k]; 1153 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1154 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1155 corners[i*2+found++] = vv; 1156 } 1157 } 1158 } 1159 if (found != 2) { 1160 PetscInt e; 1161 if (fl2g) { 1162 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1163 } else { 1164 e = idxs[0]; 1165 } 1166 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1167 } 1168 1169 /* get primal dof index on this coarse edge */ 1170 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1171 if (gc[0] > gc[1]) { 1172 PetscInt swap = corners[2*i]; 1173 corners[2*i] = corners[2*i+1]; 1174 corners[2*i+1] = swap; 1175 } 1176 cedges[i] = idxs[size-1]; 1177 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1178 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1179 } 1180 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1181 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1182 1183 #if defined(PETSC_USE_DEBUG) 1184 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1185 not interfere with neighbouring coarse edges */ 1186 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1187 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1188 for (i=0;i<nv;i++) { 1189 PetscInt emax = 0,eemax = 0; 1190 1191 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1192 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1193 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1194 for (j=1;j<nee+1;j++) { 1195 if (emax < emarks[j]) { 1196 emax = emarks[j]; 1197 eemax = j; 1198 } 1199 } 1200 /* not relevant for edges */ 1201 if (!eemax) continue; 1202 1203 for (j=ii[i];j<ii[i+1];j++) { 1204 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1205 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1206 } 1207 } 1208 } 1209 ierr = PetscFree(emarks);CHKERRQ(ierr); 1210 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1211 #endif 1212 1213 /* Compute extended rows indices for edge blocks of the change of basis */ 1214 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1215 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1216 extmem *= maxsize; 1217 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1218 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1219 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1220 for (i=0;i<nv;i++) { 1221 PetscInt mark = 0,size,start; 1222 1223 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1224 for (j=ii[i];j<ii[i+1];j++) 1225 if (marks[jj[j]] && !mark) 1226 mark = marks[jj[j]]; 1227 1228 /* not relevant */ 1229 if (!mark) continue; 1230 1231 /* import extended row */ 1232 mark--; 1233 start = mark*extmem+extrowcum[mark]; 1234 size = ii[i+1]-ii[i]; 1235 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1236 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1237 extrowcum[mark] += size; 1238 } 1239 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1240 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1241 ierr = PetscFree(marks);CHKERRQ(ierr); 1242 1243 /* Compress extrows */ 1244 cum = 0; 1245 for (i=0;i<nee;i++) { 1246 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1247 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1248 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1249 cum = PetscMax(cum,size); 1250 } 1251 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1252 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1253 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1254 1255 /* Workspace for lapack inner calls and VecSetValues */ 1256 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1257 1258 /* Create change of basis matrix (preallocation can be improved) */ 1259 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1260 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1261 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1262 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1263 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1264 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1265 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1266 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1267 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1268 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1269 1270 /* Defaults to identity */ 1271 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1272 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1273 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1274 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1275 1276 /* Create discrete gradient for the coarser level if needed */ 1277 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1278 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1279 if (pcbddc->current_level < pcbddc->max_levels) { 1280 ISLocalToGlobalMapping cel2g,cvl2g; 1281 IS wis,gwis; 1282 PetscInt cnv,cne; 1283 1284 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1285 if (fl2g) { 1286 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1287 } else { 1288 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1289 pcbddc->nedclocal = wis; 1290 } 1291 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1292 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1293 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1294 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1297 1298 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1299 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1300 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1301 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1305 1306 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1307 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1308 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1309 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1310 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1311 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1312 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1313 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1314 } 1315 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1316 1317 #if defined(PRINT_GDET) 1318 inc = 0; 1319 lev = pcbddc->current_level; 1320 #endif 1321 1322 /* Insert values in the change of basis matrix */ 1323 for (i=0;i<nee;i++) { 1324 Mat Gins = NULL, GKins = NULL; 1325 IS cornersis = NULL; 1326 PetscScalar cvals[2]; 1327 1328 if (pcbddc->nedcG) { 1329 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1330 } 1331 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1332 if (Gins && GKins) { 1333 PetscScalar *data; 1334 const PetscInt *rows,*cols; 1335 PetscInt nrh,nch,nrc,ncc; 1336 1337 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1338 /* H1 */ 1339 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1340 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1341 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1342 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1343 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1344 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1345 /* complement */ 1346 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1347 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1348 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1349 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1350 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1351 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1352 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1353 1354 /* coarse discrete gradient */ 1355 if (pcbddc->nedcG) { 1356 PetscInt cols[2]; 1357 1358 cols[0] = 2*i; 1359 cols[1] = 2*i+1; 1360 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1361 } 1362 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1363 } 1364 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1365 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1366 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1367 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1368 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1369 } 1370 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1371 1372 /* Start assembling */ 1373 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1374 if (pcbddc->nedcG) { 1375 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1376 } 1377 1378 /* Free */ 1379 if (fl2g) { 1380 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1381 for (i=0;i<nee;i++) { 1382 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1383 } 1384 ierr = PetscFree(eedges);CHKERRQ(ierr); 1385 } 1386 1387 /* hack mat_graph with primal dofs on the coarse edges */ 1388 { 1389 PCBDDCGraph graph = pcbddc->mat_graph; 1390 PetscInt *oqueue = graph->queue; 1391 PetscInt *ocptr = graph->cptr; 1392 PetscInt ncc,*idxs; 1393 1394 /* find first primal edge */ 1395 if (pcbddc->nedclocal) { 1396 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1397 } else { 1398 if (fl2g) { 1399 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1400 } 1401 idxs = cedges; 1402 } 1403 cum = 0; 1404 while (cum < nee && cedges[cum] < 0) cum++; 1405 1406 /* adapt connected components */ 1407 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1408 graph->cptr[0] = 0; 1409 for (i=0,ncc=0;i<graph->ncc;i++) { 1410 PetscInt lc = ocptr[i+1]-ocptr[i]; 1411 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1412 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1413 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1414 ncc++; 1415 lc--; 1416 cum++; 1417 while (cum < nee && cedges[cum] < 0) cum++; 1418 } 1419 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1420 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1421 ncc++; 1422 } 1423 graph->ncc = ncc; 1424 if (pcbddc->nedclocal) { 1425 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1426 } 1427 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1428 } 1429 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1430 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1431 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1432 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1433 1434 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1435 ierr = PetscFree(extrow);CHKERRQ(ierr); 1436 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1437 ierr = PetscFree(corners);CHKERRQ(ierr); 1438 ierr = PetscFree(cedges);CHKERRQ(ierr); 1439 ierr = PetscFree(extrows);CHKERRQ(ierr); 1440 ierr = PetscFree(extcols);CHKERRQ(ierr); 1441 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1442 1443 /* Complete assembling */ 1444 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1445 if (pcbddc->nedcG) { 1446 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 #if 0 1448 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1449 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1450 #endif 1451 } 1452 1453 /* set change of basis */ 1454 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1455 ierr = MatDestroy(&T);CHKERRQ(ierr); 1456 1457 PetscFunctionReturn(0); 1458 } 1459 1460 /* the near-null space of BDDC carries information on quadrature weights, 1461 and these can be collinear -> so cheat with MatNullSpaceCreate 1462 and create a suitable set of basis vectors first */ 1463 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1464 { 1465 PetscErrorCode ierr; 1466 PetscInt i; 1467 1468 PetscFunctionBegin; 1469 for (i=0;i<nvecs;i++) { 1470 PetscInt first,last; 1471 1472 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1473 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1474 if (i>=first && i < last) { 1475 PetscScalar *data; 1476 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1477 if (!has_const) { 1478 data[i-first] = 1.; 1479 } else { 1480 data[2*i-first] = 1./PetscSqrtReal(2.); 1481 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1482 } 1483 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1484 } 1485 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1486 } 1487 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1488 for (i=0;i<nvecs;i++) { /* reset vectors */ 1489 PetscInt first,last; 1490 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1491 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1492 if (i>=first && i < last) { 1493 PetscScalar *data; 1494 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1495 if (!has_const) { 1496 data[i-first] = 0.; 1497 } else { 1498 data[2*i-first] = 0.; 1499 data[2*i-first+1] = 0.; 1500 } 1501 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1502 } 1503 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1504 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1505 } 1506 PetscFunctionReturn(0); 1507 } 1508 1509 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1510 { 1511 Mat loc_divudotp; 1512 Vec p,v,vins,quad_vec,*quad_vecs; 1513 ISLocalToGlobalMapping map; 1514 IS *faces,*edges; 1515 PetscScalar *vals; 1516 const PetscScalar *array; 1517 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1518 PetscMPIInt rank; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1523 if (graph->twodim) { 1524 lmaxneighs = 2; 1525 } else { 1526 lmaxneighs = 1; 1527 for (i=0;i<ne;i++) { 1528 const PetscInt *idxs; 1529 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1530 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1531 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1532 } 1533 lmaxneighs++; /* graph count does not include self */ 1534 } 1535 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1536 maxsize = 0; 1537 for (i=0;i<ne;i++) { 1538 PetscInt nn; 1539 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1540 maxsize = PetscMax(maxsize,nn); 1541 } 1542 for (i=0;i<nf;i++) { 1543 PetscInt nn; 1544 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1545 maxsize = PetscMax(maxsize,nn); 1546 } 1547 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1548 /* create vectors to hold quadrature weights */ 1549 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1550 if (!transpose) { 1551 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1552 } else { 1553 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1554 } 1555 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1556 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1557 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1558 for (i=0;i<maxneighs;i++) { 1559 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1560 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1561 } 1562 1563 /* compute local quad vec */ 1564 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1565 if (!transpose) { 1566 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1567 } else { 1568 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1569 } 1570 ierr = VecSet(p,1.);CHKERRQ(ierr); 1571 if (!transpose) { 1572 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1573 } else { 1574 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1575 } 1576 if (vl2l) { 1577 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1578 } else { 1579 vins = v; 1580 } 1581 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1582 ierr = VecDestroy(&p);CHKERRQ(ierr); 1583 1584 /* insert in global quadrature vecs */ 1585 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1586 for (i=0;i<nf;i++) { 1587 const PetscInt *idxs; 1588 PetscInt idx,nn,j; 1589 1590 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1591 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1592 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1593 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1594 idx = -(idx+1); 1595 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1596 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1597 } 1598 for (i=0;i<ne;i++) { 1599 const PetscInt *idxs; 1600 PetscInt idx,nn,j; 1601 1602 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1603 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1604 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1605 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1606 idx = -(idx+1); 1607 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1608 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1609 } 1610 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1611 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1612 if (vl2l) { 1613 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1614 } 1615 ierr = VecDestroy(&v);CHKERRQ(ierr); 1616 ierr = PetscFree(vals);CHKERRQ(ierr); 1617 1618 /* assemble near null space */ 1619 for (i=0;i<maxneighs;i++) { 1620 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1621 } 1622 for (i=0;i<maxneighs;i++) { 1623 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1624 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1625 } 1626 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1627 PetscFunctionReturn(0); 1628 } 1629 1630 1631 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1632 { 1633 PetscErrorCode ierr; 1634 Vec local,global; 1635 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1636 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1637 PetscBool monolithic = PETSC_FALSE; 1638 1639 PetscFunctionBegin; 1640 /* need to convert from global to local topology information and remove references to information in global ordering */ 1641 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1642 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1643 if (pcbddc->user_provided_isfordofs) { 1644 if (pcbddc->n_ISForDofs) { 1645 PetscInt i; 1646 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1647 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1648 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1649 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1650 } 1651 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1652 pcbddc->n_ISForDofs = 0; 1653 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1654 } 1655 } else { 1656 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1657 DM dm; 1658 1659 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1660 if (!dm) { 1661 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1662 } 1663 if (dm) { 1664 IS *fields; 1665 PetscInt nf,i; 1666 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1668 for (i=0;i<nf;i++) { 1669 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1670 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1671 } 1672 ierr = PetscFree(fields);CHKERRQ(ierr); 1673 pcbddc->n_ISForDofsLocal = nf; 1674 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1675 PetscContainer c; 1676 1677 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1678 if (c) { 1679 MatISLocalFields lf; 1680 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1681 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1682 } else { /* fallback, create the default fields if bs > 1 */ 1683 PetscInt i, n = matis->A->rmap->n; 1684 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1685 if (i > 1) { 1686 pcbddc->n_ISForDofsLocal = i; 1687 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1688 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1689 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1690 } 1691 } 1692 } 1693 } 1694 } else { 1695 PetscInt i; 1696 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1697 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1698 } 1699 } 1700 } 1701 1702 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1703 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1704 } else if (pcbddc->DirichletBoundariesLocal) { 1705 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1706 } 1707 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1708 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1709 } else if (pcbddc->NeumannBoundariesLocal) { 1710 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1711 } 1712 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1713 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1714 } 1715 ierr = VecDestroy(&global);CHKERRQ(ierr); 1716 ierr = VecDestroy(&local);CHKERRQ(ierr); 1717 1718 PetscFunctionReturn(0); 1719 } 1720 1721 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1722 { 1723 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1724 PetscErrorCode ierr; 1725 IS nis; 1726 const PetscInt *idxs; 1727 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1728 PetscBool *ld; 1729 1730 PetscFunctionBegin; 1731 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1732 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1733 if (mop == MPI_LAND) { 1734 /* init rootdata with true */ 1735 ld = (PetscBool*) matis->sf_rootdata; 1736 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1737 } else { 1738 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1739 } 1740 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1741 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1742 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1743 ld = (PetscBool*) matis->sf_leafdata; 1744 for (i=0;i<nd;i++) 1745 if (-1 < idxs[i] && idxs[i] < n) 1746 ld[idxs[i]] = PETSC_TRUE; 1747 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1748 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1749 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1750 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1751 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1752 if (mop == MPI_LAND) { 1753 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1754 } else { 1755 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1756 } 1757 for (i=0,nnd=0;i<n;i++) 1758 if (ld[i]) 1759 nidxs[nnd++] = i; 1760 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1761 ierr = ISDestroy(is);CHKERRQ(ierr); 1762 *is = nis; 1763 PetscFunctionReturn(0); 1764 } 1765 1766 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1767 { 1768 PC_IS *pcis = (PC_IS*)(pc->data); 1769 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 if (!pcbddc->benign_have_null) { 1774 PetscFunctionReturn(0); 1775 } 1776 if (pcbddc->ChangeOfBasisMatrix) { 1777 Vec swap; 1778 1779 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1780 swap = pcbddc->work_change; 1781 pcbddc->work_change = r; 1782 r = swap; 1783 } 1784 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1785 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1786 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1787 ierr = VecSet(z,0.);CHKERRQ(ierr); 1788 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1789 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1790 if (pcbddc->ChangeOfBasisMatrix) { 1791 pcbddc->work_change = r; 1792 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1793 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1794 } 1795 PetscFunctionReturn(0); 1796 } 1797 1798 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1799 { 1800 PCBDDCBenignMatMult_ctx ctx; 1801 PetscErrorCode ierr; 1802 PetscBool apply_right,apply_left,reset_x; 1803 1804 PetscFunctionBegin; 1805 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1806 if (transpose) { 1807 apply_right = ctx->apply_left; 1808 apply_left = ctx->apply_right; 1809 } else { 1810 apply_right = ctx->apply_right; 1811 apply_left = ctx->apply_left; 1812 } 1813 reset_x = PETSC_FALSE; 1814 if (apply_right) { 1815 const PetscScalar *ax; 1816 PetscInt nl,i; 1817 1818 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1819 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1820 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1821 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1822 for (i=0;i<ctx->benign_n;i++) { 1823 PetscScalar sum,val; 1824 const PetscInt *idxs; 1825 PetscInt nz,j; 1826 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1827 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1828 sum = 0.; 1829 if (ctx->apply_p0) { 1830 val = ctx->work[idxs[nz-1]]; 1831 for (j=0;j<nz-1;j++) { 1832 sum += ctx->work[idxs[j]]; 1833 ctx->work[idxs[j]] += val; 1834 } 1835 } else { 1836 for (j=0;j<nz-1;j++) { 1837 sum += ctx->work[idxs[j]]; 1838 } 1839 } 1840 ctx->work[idxs[nz-1]] -= sum; 1841 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1842 } 1843 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1844 reset_x = PETSC_TRUE; 1845 } 1846 if (transpose) { 1847 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1848 } else { 1849 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1850 } 1851 if (reset_x) { 1852 ierr = VecResetArray(x);CHKERRQ(ierr); 1853 } 1854 if (apply_left) { 1855 PetscScalar *ay; 1856 PetscInt i; 1857 1858 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1859 for (i=0;i<ctx->benign_n;i++) { 1860 PetscScalar sum,val; 1861 const PetscInt *idxs; 1862 PetscInt nz,j; 1863 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1864 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1865 val = -ay[idxs[nz-1]]; 1866 if (ctx->apply_p0) { 1867 sum = 0.; 1868 for (j=0;j<nz-1;j++) { 1869 sum += ay[idxs[j]]; 1870 ay[idxs[j]] += val; 1871 } 1872 ay[idxs[nz-1]] += sum; 1873 } else { 1874 for (j=0;j<nz-1;j++) { 1875 ay[idxs[j]] += val; 1876 } 1877 ay[idxs[nz-1]] = 0.; 1878 } 1879 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1880 } 1881 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1882 } 1883 PetscFunctionReturn(0); 1884 } 1885 1886 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1887 { 1888 PetscErrorCode ierr; 1889 1890 PetscFunctionBegin; 1891 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1896 { 1897 PetscErrorCode ierr; 1898 1899 PetscFunctionBegin; 1900 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1901 PetscFunctionReturn(0); 1902 } 1903 1904 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1905 { 1906 PC_IS *pcis = (PC_IS*)pc->data; 1907 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1908 PCBDDCBenignMatMult_ctx ctx; 1909 PetscErrorCode ierr; 1910 1911 PetscFunctionBegin; 1912 if (!restore) { 1913 Mat A_IB,A_BI; 1914 PetscScalar *work; 1915 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1916 1917 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1918 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1919 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1920 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1921 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1922 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1923 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1924 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1925 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1926 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1927 ctx->apply_left = PETSC_TRUE; 1928 ctx->apply_right = PETSC_FALSE; 1929 ctx->apply_p0 = PETSC_FALSE; 1930 ctx->benign_n = pcbddc->benign_n; 1931 if (reuse) { 1932 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1933 ctx->free = PETSC_FALSE; 1934 } else { /* TODO: could be optimized for successive solves */ 1935 ISLocalToGlobalMapping N_to_D; 1936 PetscInt i; 1937 1938 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1939 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1940 for (i=0;i<pcbddc->benign_n;i++) { 1941 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1942 } 1943 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1944 ctx->free = PETSC_TRUE; 1945 } 1946 ctx->A = pcis->A_IB; 1947 ctx->work = work; 1948 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1949 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1950 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1951 pcis->A_IB = A_IB; 1952 1953 /* A_BI as A_IB^T */ 1954 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1955 pcbddc->benign_original_mat = pcis->A_BI; 1956 pcis->A_BI = A_BI; 1957 } else { 1958 if (!pcbddc->benign_original_mat) { 1959 PetscFunctionReturn(0); 1960 } 1961 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1962 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1963 pcis->A_IB = ctx->A; 1964 ctx->A = NULL; 1965 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1966 pcis->A_BI = pcbddc->benign_original_mat; 1967 pcbddc->benign_original_mat = NULL; 1968 if (ctx->free) { 1969 PetscInt i; 1970 for (i=0;i<ctx->benign_n;i++) { 1971 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1972 } 1973 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1974 } 1975 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1976 ierr = PetscFree(ctx);CHKERRQ(ierr); 1977 } 1978 PetscFunctionReturn(0); 1979 } 1980 1981 /* used just in bddc debug mode */ 1982 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1983 { 1984 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1985 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1986 Mat An; 1987 PetscErrorCode ierr; 1988 1989 PetscFunctionBegin; 1990 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1991 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1992 if (is1) { 1993 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1994 ierr = MatDestroy(&An);CHKERRQ(ierr); 1995 } else { 1996 *B = An; 1997 } 1998 PetscFunctionReturn(0); 1999 } 2000 2001 /* TODO: add reuse flag */ 2002 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2003 { 2004 Mat Bt; 2005 PetscScalar *a,*bdata; 2006 const PetscInt *ii,*ij; 2007 PetscInt m,n,i,nnz,*bii,*bij; 2008 PetscBool flg_row; 2009 PetscErrorCode ierr; 2010 2011 PetscFunctionBegin; 2012 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2013 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2014 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2015 nnz = n; 2016 for (i=0;i<ii[n];i++) { 2017 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2018 } 2019 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2020 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2021 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2022 nnz = 0; 2023 bii[0] = 0; 2024 for (i=0;i<n;i++) { 2025 PetscInt j; 2026 for (j=ii[i];j<ii[i+1];j++) { 2027 PetscScalar entry = a[j]; 2028 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2029 bij[nnz] = ij[j]; 2030 bdata[nnz] = entry; 2031 nnz++; 2032 } 2033 } 2034 bii[i+1] = nnz; 2035 } 2036 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2037 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2038 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2039 { 2040 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2041 b->free_a = PETSC_TRUE; 2042 b->free_ij = PETSC_TRUE; 2043 } 2044 *B = Bt; 2045 PetscFunctionReturn(0); 2046 } 2047 2048 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 2049 { 2050 Mat B; 2051 IS is_dummy,*cc_n; 2052 ISLocalToGlobalMapping l2gmap_dummy; 2053 PCBDDCGraph graph; 2054 PetscInt i,n; 2055 PetscInt *xadj,*adjncy; 2056 PetscInt *xadj_filtered,*adjncy_filtered; 2057 PetscBool flg_row,isseqaij; 2058 PetscErrorCode ierr; 2059 2060 PetscFunctionBegin; 2061 if (!A->rmap->N || !A->cmap->N) { 2062 *ncc = 0; 2063 *cc = NULL; 2064 PetscFunctionReturn(0); 2065 } 2066 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2067 if (!isseqaij && filter) { 2068 PetscBool isseqdense; 2069 2070 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2071 if (!isseqdense) { 2072 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2073 } else { /* TODO: rectangular case and LDA */ 2074 PetscScalar *array; 2075 PetscReal chop=1.e-6; 2076 2077 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2078 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2079 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2080 for (i=0;i<n;i++) { 2081 PetscInt j; 2082 for (j=i+1;j<n;j++) { 2083 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2084 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2085 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2086 } 2087 } 2088 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2089 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2090 } 2091 } else { 2092 B = A; 2093 } 2094 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2095 2096 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2097 if (filter) { 2098 PetscScalar *data; 2099 PetscInt j,cum; 2100 2101 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2102 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2103 cum = 0; 2104 for (i=0;i<n;i++) { 2105 PetscInt t; 2106 2107 for (j=xadj[i];j<xadj[i+1];j++) { 2108 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2109 continue; 2110 } 2111 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2112 } 2113 t = xadj_filtered[i]; 2114 xadj_filtered[i] = cum; 2115 cum += t; 2116 } 2117 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2118 } else { 2119 xadj_filtered = NULL; 2120 adjncy_filtered = NULL; 2121 } 2122 2123 /* compute local connected components using PCBDDCGraph */ 2124 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2125 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2126 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2127 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2128 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2129 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2130 if (xadj_filtered) { 2131 graph->xadj = xadj_filtered; 2132 graph->adjncy = adjncy_filtered; 2133 } else { 2134 graph->xadj = xadj; 2135 graph->adjncy = adjncy; 2136 } 2137 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2138 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2139 /* partial clean up */ 2140 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2141 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2142 if (A != B) { 2143 ierr = MatDestroy(&B);CHKERRQ(ierr); 2144 } 2145 2146 /* get back data */ 2147 if (ncc) *ncc = graph->ncc; 2148 if (cc) { 2149 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2150 for (i=0;i<graph->ncc;i++) { 2151 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2152 } 2153 *cc = cc_n; 2154 } 2155 /* clean up graph */ 2156 graph->xadj = 0; 2157 graph->adjncy = 0; 2158 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2163 { 2164 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2165 PC_IS* pcis = (PC_IS*)(pc->data); 2166 IS dirIS = NULL; 2167 PetscInt i; 2168 PetscErrorCode ierr; 2169 2170 PetscFunctionBegin; 2171 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2172 if (zerodiag) { 2173 Mat A; 2174 Vec vec3_N; 2175 PetscScalar *vals; 2176 const PetscInt *idxs; 2177 PetscInt nz,*count; 2178 2179 /* p0 */ 2180 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2181 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2182 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2183 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2184 for (i=0;i<nz;i++) vals[i] = 1.; 2185 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2186 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2187 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2188 /* v_I */ 2189 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2190 for (i=0;i<nz;i++) vals[i] = 0.; 2191 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2192 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2193 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2194 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2195 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2196 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2197 if (dirIS) { 2198 PetscInt n; 2199 2200 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2201 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2202 for (i=0;i<n;i++) vals[i] = 0.; 2203 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2204 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2205 } 2206 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2207 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2208 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2209 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2210 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2211 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2212 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2213 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2214 ierr = PetscFree(vals);CHKERRQ(ierr); 2215 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2216 2217 /* there should not be any pressure dofs lying on the interface */ 2218 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2219 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2220 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2221 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2222 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2223 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2224 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2225 ierr = PetscFree(count);CHKERRQ(ierr); 2226 } 2227 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2228 2229 /* check PCBDDCBenignGetOrSetP0 */ 2230 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2231 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2232 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2233 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2234 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2235 for (i=0;i<pcbddc->benign_n;i++) { 2236 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2237 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2238 } 2239 PetscFunctionReturn(0); 2240 } 2241 2242 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2243 { 2244 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2245 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2246 PetscInt nz,n; 2247 PetscInt *interior_dofs,n_interior_dofs,nneu; 2248 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2249 PetscErrorCode ierr; 2250 2251 PetscFunctionBegin; 2252 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2253 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2254 for (n=0;n<pcbddc->benign_n;n++) { 2255 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2256 } 2257 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2258 pcbddc->benign_n = 0; 2259 2260 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2261 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2262 Checks if all the pressure dofs in each subdomain have a zero diagonal 2263 If not, a change of basis on pressures is not needed 2264 since the local Schur complements are already SPD 2265 */ 2266 has_null_pressures = PETSC_TRUE; 2267 have_null = PETSC_TRUE; 2268 if (pcbddc->n_ISForDofsLocal) { 2269 IS iP = NULL; 2270 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2271 2272 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2273 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2274 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2275 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2276 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2277 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2278 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2279 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2280 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2281 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2282 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2283 if (iP) { 2284 IS newpressures; 2285 2286 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2287 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2288 pressures = newpressures; 2289 } 2290 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2291 if (!sorted) { 2292 ierr = ISSort(pressures);CHKERRQ(ierr); 2293 } 2294 } else { 2295 pressures = NULL; 2296 } 2297 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2298 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2299 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2300 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2301 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2302 if (!sorted) { 2303 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2304 } 2305 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2306 zerodiag_save = zerodiag; 2307 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2308 if (!nz) { 2309 if (n) have_null = PETSC_FALSE; 2310 has_null_pressures = PETSC_FALSE; 2311 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2312 } 2313 recompute_zerodiag = PETSC_FALSE; 2314 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2315 zerodiag_subs = NULL; 2316 pcbddc->benign_n = 0; 2317 n_interior_dofs = 0; 2318 interior_dofs = NULL; 2319 nneu = 0; 2320 if (pcbddc->NeumannBoundariesLocal) { 2321 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2322 } 2323 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2324 if (checkb) { /* need to compute interior nodes */ 2325 PetscInt n,i,j; 2326 PetscInt n_neigh,*neigh,*n_shared,**shared; 2327 PetscInt *iwork; 2328 2329 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2330 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2331 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2332 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2333 for (i=1;i<n_neigh;i++) 2334 for (j=0;j<n_shared[i];j++) 2335 iwork[shared[i][j]] += 1; 2336 for (i=0;i<n;i++) 2337 if (!iwork[i]) 2338 interior_dofs[n_interior_dofs++] = i; 2339 ierr = PetscFree(iwork);CHKERRQ(ierr); 2340 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2341 } 2342 if (has_null_pressures) { 2343 IS *subs; 2344 PetscInt nsubs,i,j,nl; 2345 const PetscInt *idxs; 2346 PetscScalar *array; 2347 Vec *work; 2348 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2349 2350 subs = pcbddc->local_subs; 2351 nsubs = pcbddc->n_local_subs; 2352 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2353 if (checkb) { 2354 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2355 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2356 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2357 /* work[0] = 1_p */ 2358 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2359 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2360 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2361 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2362 /* work[0] = 1_v */ 2363 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2364 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2365 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2366 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2367 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2368 } 2369 if (nsubs > 1) { 2370 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2371 for (i=0;i<nsubs;i++) { 2372 ISLocalToGlobalMapping l2g; 2373 IS t_zerodiag_subs; 2374 PetscInt nl; 2375 2376 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2377 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2378 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2379 if (nl) { 2380 PetscBool valid = PETSC_TRUE; 2381 2382 if (checkb) { 2383 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2384 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2385 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2386 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2387 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2388 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2389 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2390 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2391 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2392 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2393 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2394 for (j=0;j<n_interior_dofs;j++) { 2395 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2396 valid = PETSC_FALSE; 2397 break; 2398 } 2399 } 2400 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2401 } 2402 if (valid && nneu) { 2403 const PetscInt *idxs; 2404 PetscInt nzb; 2405 2406 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2407 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2408 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2409 if (nzb) valid = PETSC_FALSE; 2410 } 2411 if (valid && pressures) { 2412 IS t_pressure_subs; 2413 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2414 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2415 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2416 } 2417 if (valid) { 2418 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2419 pcbddc->benign_n++; 2420 } else { 2421 recompute_zerodiag = PETSC_TRUE; 2422 } 2423 } 2424 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2425 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2426 } 2427 } else { /* there's just one subdomain (or zero if they have not been detected */ 2428 PetscBool valid = PETSC_TRUE; 2429 2430 if (nneu) valid = PETSC_FALSE; 2431 if (valid && pressures) { 2432 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2433 } 2434 if (valid && checkb) { 2435 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2436 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2437 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2438 for (j=0;j<n_interior_dofs;j++) { 2439 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2440 valid = PETSC_FALSE; 2441 break; 2442 } 2443 } 2444 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2445 } 2446 if (valid) { 2447 pcbddc->benign_n = 1; 2448 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2449 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2450 zerodiag_subs[0] = zerodiag; 2451 } 2452 } 2453 if (checkb) { 2454 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2455 } 2456 } 2457 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2458 2459 if (!pcbddc->benign_n) { 2460 PetscInt n; 2461 2462 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2463 recompute_zerodiag = PETSC_FALSE; 2464 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2465 if (n) { 2466 has_null_pressures = PETSC_FALSE; 2467 have_null = PETSC_FALSE; 2468 } 2469 } 2470 2471 /* final check for null pressures */ 2472 if (zerodiag && pressures) { 2473 PetscInt nz,np; 2474 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2475 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2476 if (nz != np) have_null = PETSC_FALSE; 2477 } 2478 2479 if (recompute_zerodiag) { 2480 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2481 if (pcbddc->benign_n == 1) { 2482 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2483 zerodiag = zerodiag_subs[0]; 2484 } else { 2485 PetscInt i,nzn,*new_idxs; 2486 2487 nzn = 0; 2488 for (i=0;i<pcbddc->benign_n;i++) { 2489 PetscInt ns; 2490 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2491 nzn += ns; 2492 } 2493 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2494 nzn = 0; 2495 for (i=0;i<pcbddc->benign_n;i++) { 2496 PetscInt ns,*idxs; 2497 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2498 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2499 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2500 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2501 nzn += ns; 2502 } 2503 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2504 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2505 } 2506 have_null = PETSC_FALSE; 2507 } 2508 2509 /* Prepare matrix to compute no-net-flux */ 2510 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2511 Mat A,loc_divudotp; 2512 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2513 IS row,col,isused = NULL; 2514 PetscInt M,N,n,st,n_isused; 2515 2516 if (pressures) { 2517 isused = pressures; 2518 } else { 2519 isused = zerodiag_save; 2520 } 2521 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2522 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2523 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2524 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2525 n_isused = 0; 2526 if (isused) { 2527 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2528 } 2529 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2530 st = st-n_isused; 2531 if (n) { 2532 const PetscInt *gidxs; 2533 2534 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2535 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2536 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2537 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2538 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2539 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2540 } else { 2541 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2542 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2543 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2544 } 2545 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2546 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2547 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2548 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2549 ierr = ISDestroy(&row);CHKERRQ(ierr); 2550 ierr = ISDestroy(&col);CHKERRQ(ierr); 2551 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2552 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2553 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2554 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2555 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2556 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2557 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2558 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2559 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2560 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2561 } 2562 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2563 2564 /* change of basis and p0 dofs */ 2565 if (has_null_pressures) { 2566 IS zerodiagc; 2567 const PetscInt *idxs,*idxsc; 2568 PetscInt i,s,*nnz; 2569 2570 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2571 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2572 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2573 /* local change of basis for pressures */ 2574 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2575 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2576 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2577 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2578 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2579 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2580 for (i=0;i<pcbddc->benign_n;i++) { 2581 PetscInt nzs,j; 2582 2583 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2584 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2585 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2586 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2587 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2588 } 2589 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2590 ierr = PetscFree(nnz);CHKERRQ(ierr); 2591 /* set identity on velocities */ 2592 for (i=0;i<n-nz;i++) { 2593 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2594 } 2595 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2596 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2597 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2598 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2599 /* set change on pressures */ 2600 for (s=0;s<pcbddc->benign_n;s++) { 2601 PetscScalar *array; 2602 PetscInt nzs; 2603 2604 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2605 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2606 for (i=0;i<nzs-1;i++) { 2607 PetscScalar vals[2]; 2608 PetscInt cols[2]; 2609 2610 cols[0] = idxs[i]; 2611 cols[1] = idxs[nzs-1]; 2612 vals[0] = 1.; 2613 vals[1] = 1.; 2614 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2615 } 2616 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2617 for (i=0;i<nzs-1;i++) array[i] = -1.; 2618 array[nzs-1] = 1.; 2619 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2620 /* store local idxs for p0 */ 2621 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2622 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2623 ierr = PetscFree(array);CHKERRQ(ierr); 2624 } 2625 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2626 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2627 /* project if needed */ 2628 if (pcbddc->benign_change_explicit) { 2629 Mat M; 2630 2631 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2632 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2633 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2634 ierr = MatDestroy(&M);CHKERRQ(ierr); 2635 } 2636 /* store global idxs for p0 */ 2637 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2638 } 2639 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2640 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2641 2642 /* determines if the coarse solver will be singular or not */ 2643 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2644 /* determines if the problem has subdomains with 0 pressure block */ 2645 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2646 *zerodiaglocal = zerodiag; 2647 PetscFunctionReturn(0); 2648 } 2649 2650 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2651 { 2652 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2653 PetscScalar *array; 2654 PetscErrorCode ierr; 2655 2656 PetscFunctionBegin; 2657 if (!pcbddc->benign_sf) { 2658 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2659 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2660 } 2661 if (get) { 2662 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2663 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2664 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2665 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2666 } else { 2667 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2668 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2669 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2670 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2671 } 2672 PetscFunctionReturn(0); 2673 } 2674 2675 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2676 { 2677 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2678 PetscErrorCode ierr; 2679 2680 PetscFunctionBegin; 2681 /* TODO: add error checking 2682 - avoid nested pop (or push) calls. 2683 - cannot push before pop. 2684 - cannot call this if pcbddc->local_mat is NULL 2685 */ 2686 if (!pcbddc->benign_n) { 2687 PetscFunctionReturn(0); 2688 } 2689 if (pop) { 2690 if (pcbddc->benign_change_explicit) { 2691 IS is_p0; 2692 MatReuse reuse; 2693 2694 /* extract B_0 */ 2695 reuse = MAT_INITIAL_MATRIX; 2696 if (pcbddc->benign_B0) { 2697 reuse = MAT_REUSE_MATRIX; 2698 } 2699 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2700 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2701 /* remove rows and cols from local problem */ 2702 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2703 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2704 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2705 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2706 } else { 2707 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2708 PetscScalar *vals; 2709 PetscInt i,n,*idxs_ins; 2710 2711 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2712 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2713 if (!pcbddc->benign_B0) { 2714 PetscInt *nnz; 2715 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2716 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2717 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2718 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2719 for (i=0;i<pcbddc->benign_n;i++) { 2720 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2721 nnz[i] = n - nnz[i]; 2722 } 2723 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2724 ierr = PetscFree(nnz);CHKERRQ(ierr); 2725 } 2726 2727 for (i=0;i<pcbddc->benign_n;i++) { 2728 PetscScalar *array; 2729 PetscInt *idxs,j,nz,cum; 2730 2731 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2732 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2733 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2734 for (j=0;j<nz;j++) vals[j] = 1.; 2735 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2736 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2737 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2738 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2739 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2740 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2741 cum = 0; 2742 for (j=0;j<n;j++) { 2743 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2744 vals[cum] = array[j]; 2745 idxs_ins[cum] = j; 2746 cum++; 2747 } 2748 } 2749 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2750 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2751 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2752 } 2753 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2754 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2755 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2756 } 2757 } else { /* push */ 2758 if (pcbddc->benign_change_explicit) { 2759 PetscInt i; 2760 2761 for (i=0;i<pcbddc->benign_n;i++) { 2762 PetscScalar *B0_vals; 2763 PetscInt *B0_cols,B0_ncol; 2764 2765 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2766 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2767 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2768 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2769 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2770 } 2771 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2772 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2773 } else { 2774 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2775 } 2776 } 2777 PetscFunctionReturn(0); 2778 } 2779 2780 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2781 { 2782 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2783 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2784 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2785 PetscBLASInt *B_iwork,*B_ifail; 2786 PetscScalar *work,lwork; 2787 PetscScalar *St,*S,*eigv; 2788 PetscScalar *Sarray,*Starray; 2789 PetscReal *eigs,thresh; 2790 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2791 PetscBool allocated_S_St; 2792 #if defined(PETSC_USE_COMPLEX) 2793 PetscReal *rwork; 2794 #endif 2795 PetscErrorCode ierr; 2796 2797 PetscFunctionBegin; 2798 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2799 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2800 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2801 2802 if (pcbddc->dbg_flag) { 2803 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2804 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2805 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2806 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2807 } 2808 2809 if (pcbddc->dbg_flag) { 2810 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2811 } 2812 2813 /* max size of subsets */ 2814 mss = 0; 2815 for (i=0;i<sub_schurs->n_subs;i++) { 2816 PetscInt subset_size; 2817 2818 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2819 mss = PetscMax(mss,subset_size); 2820 } 2821 2822 /* min/max and threshold */ 2823 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2824 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2825 nmax = PetscMax(nmin,nmax); 2826 allocated_S_St = PETSC_FALSE; 2827 if (nmin) { 2828 allocated_S_St = PETSC_TRUE; 2829 } 2830 2831 /* allocate lapack workspace */ 2832 cum = cum2 = 0; 2833 maxneigs = 0; 2834 for (i=0;i<sub_schurs->n_subs;i++) { 2835 PetscInt n,subset_size; 2836 2837 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2838 n = PetscMin(subset_size,nmax); 2839 cum += subset_size; 2840 cum2 += subset_size*n; 2841 maxneigs = PetscMax(maxneigs,n); 2842 } 2843 if (mss) { 2844 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2845 PetscBLASInt B_itype = 1; 2846 PetscBLASInt B_N = mss; 2847 PetscReal zero = 0.0; 2848 PetscReal eps = 0.0; /* dlamch? */ 2849 2850 B_lwork = -1; 2851 S = NULL; 2852 St = NULL; 2853 eigs = NULL; 2854 eigv = NULL; 2855 B_iwork = NULL; 2856 B_ifail = NULL; 2857 #if defined(PETSC_USE_COMPLEX) 2858 rwork = NULL; 2859 #endif 2860 thresh = 1.0; 2861 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2862 #if defined(PETSC_USE_COMPLEX) 2863 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2864 #else 2865 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2866 #endif 2867 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2868 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2869 } else { 2870 /* TODO */ 2871 } 2872 } else { 2873 lwork = 0; 2874 } 2875 2876 nv = 0; 2877 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2878 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2879 } 2880 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2881 if (allocated_S_St) { 2882 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2883 } 2884 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2885 #if defined(PETSC_USE_COMPLEX) 2886 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2887 #endif 2888 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2889 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2890 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2891 nv+cum,&pcbddc->adaptive_constraints_idxs, 2892 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2893 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2894 2895 maxneigs = 0; 2896 cum = cumarray = 0; 2897 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2898 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2899 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2900 const PetscInt *idxs; 2901 2902 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2903 for (cum=0;cum<nv;cum++) { 2904 pcbddc->adaptive_constraints_n[cum] = 1; 2905 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2906 pcbddc->adaptive_constraints_data[cum] = 1.0; 2907 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2908 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2909 } 2910 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2911 } 2912 2913 if (mss) { /* multilevel */ 2914 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2915 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2916 } 2917 2918 thresh = pcbddc->adaptive_threshold; 2919 for (i=0;i<sub_schurs->n_subs;i++) { 2920 const PetscInt *idxs; 2921 PetscReal upper,lower; 2922 PetscInt j,subset_size,eigs_start = 0; 2923 PetscBLASInt B_N; 2924 PetscBool same_data = PETSC_FALSE; 2925 2926 if (pcbddc->use_deluxe_scaling) { 2927 upper = PETSC_MAX_REAL; 2928 lower = thresh; 2929 } else { 2930 upper = 1./thresh; 2931 lower = 0.; 2932 } 2933 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2934 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2935 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2936 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2937 if (sub_schurs->is_hermitian) { 2938 PetscInt j,k; 2939 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2940 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2941 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2942 } 2943 for (j=0;j<subset_size;j++) { 2944 for (k=j;k<subset_size;k++) { 2945 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2946 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2947 } 2948 } 2949 } else { 2950 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2951 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2952 } 2953 } else { 2954 S = Sarray + cumarray; 2955 St = Starray + cumarray; 2956 } 2957 /* see if we can save some work */ 2958 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2959 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2960 } 2961 2962 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2963 B_neigs = 0; 2964 } else { 2965 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2966 PetscBLASInt B_itype = 1; 2967 PetscBLASInt B_IL, B_IU; 2968 PetscReal eps = -1.0; /* dlamch? */ 2969 PetscInt nmin_s; 2970 PetscBool compute_range = PETSC_FALSE; 2971 2972 if (pcbddc->dbg_flag) { 2973 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2974 } 2975 2976 compute_range = PETSC_FALSE; 2977 if (thresh > 1.+PETSC_SMALL && !same_data) { 2978 compute_range = PETSC_TRUE; 2979 } 2980 2981 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2982 if (compute_range) { 2983 2984 /* ask for eigenvalues larger than thresh */ 2985 #if defined(PETSC_USE_COMPLEX) 2986 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2987 #else 2988 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2989 #endif 2990 } else if (!same_data) { 2991 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2992 B_IL = 1; 2993 #if defined(PETSC_USE_COMPLEX) 2994 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2995 #else 2996 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2997 #endif 2998 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2999 PetscInt k; 3000 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3001 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3002 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3003 nmin = nmax; 3004 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3005 for (k=0;k<nmax;k++) { 3006 eigs[k] = 1./PETSC_SMALL; 3007 eigv[k*(subset_size+1)] = 1.0; 3008 } 3009 } 3010 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3011 if (B_ierr) { 3012 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3013 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3014 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3015 } 3016 3017 if (B_neigs > nmax) { 3018 if (pcbddc->dbg_flag) { 3019 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3020 } 3021 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3022 B_neigs = nmax; 3023 } 3024 3025 nmin_s = PetscMin(nmin,B_N); 3026 if (B_neigs < nmin_s) { 3027 PetscBLASInt B_neigs2; 3028 3029 if (pcbddc->use_deluxe_scaling) { 3030 B_IL = B_N - nmin_s + 1; 3031 B_IU = B_N - B_neigs; 3032 } else { 3033 B_IL = B_neigs + 1; 3034 B_IU = nmin_s; 3035 } 3036 if (pcbddc->dbg_flag) { 3037 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3038 } 3039 if (sub_schurs->is_hermitian) { 3040 PetscInt j,k; 3041 for (j=0;j<subset_size;j++) { 3042 for (k=j;k<subset_size;k++) { 3043 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3044 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3045 } 3046 } 3047 } else { 3048 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3049 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3050 } 3051 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3052 #if defined(PETSC_USE_COMPLEX) 3053 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3054 #else 3055 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3056 #endif 3057 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3058 B_neigs += B_neigs2; 3059 } 3060 if (B_ierr) { 3061 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3062 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3063 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3064 } 3065 if (pcbddc->dbg_flag) { 3066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3067 for (j=0;j<B_neigs;j++) { 3068 if (eigs[j] == 0.0) { 3069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3070 } else { 3071 if (pcbddc->use_deluxe_scaling) { 3072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3073 } else { 3074 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3075 } 3076 } 3077 } 3078 } 3079 } else { 3080 /* TODO */ 3081 } 3082 } 3083 /* change the basis back to the original one */ 3084 if (sub_schurs->change) { 3085 Mat change,phi,phit; 3086 3087 if (pcbddc->dbg_flag > 1) { 3088 PetscInt ii; 3089 for (ii=0;ii<B_neigs;ii++) { 3090 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3091 for (j=0;j<B_N;j++) { 3092 #if defined(PETSC_USE_COMPLEX) 3093 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3094 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3095 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3096 #else 3097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3098 #endif 3099 } 3100 } 3101 } 3102 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3103 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3104 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3105 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3106 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3107 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3108 } 3109 maxneigs = PetscMax(B_neigs,maxneigs); 3110 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3111 if (B_neigs) { 3112 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3113 3114 if (pcbddc->dbg_flag > 1) { 3115 PetscInt ii; 3116 for (ii=0;ii<B_neigs;ii++) { 3117 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3118 for (j=0;j<B_N;j++) { 3119 #if defined(PETSC_USE_COMPLEX) 3120 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3121 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3122 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3123 #else 3124 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3125 #endif 3126 } 3127 } 3128 } 3129 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3130 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3131 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3132 cum++; 3133 } 3134 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3135 /* shift for next computation */ 3136 cumarray += subset_size*subset_size; 3137 } 3138 if (pcbddc->dbg_flag) { 3139 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3140 } 3141 3142 if (mss) { 3143 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3144 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3145 /* destroy matrices (junk) */ 3146 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3147 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3148 } 3149 if (allocated_S_St) { 3150 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3151 } 3152 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3153 #if defined(PETSC_USE_COMPLEX) 3154 ierr = PetscFree(rwork);CHKERRQ(ierr); 3155 #endif 3156 if (pcbddc->dbg_flag) { 3157 PetscInt maxneigs_r; 3158 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3160 } 3161 PetscFunctionReturn(0); 3162 } 3163 3164 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3165 { 3166 PetscScalar *coarse_submat_vals; 3167 PetscErrorCode ierr; 3168 3169 PetscFunctionBegin; 3170 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3171 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3172 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3173 3174 /* Setup local neumann solver ksp_R */ 3175 /* PCBDDCSetUpLocalScatters should be called first! */ 3176 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3177 3178 /* 3179 Setup local correction and local part of coarse basis. 3180 Gives back the dense local part of the coarse matrix in column major ordering 3181 */ 3182 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3183 3184 /* Compute total number of coarse nodes and setup coarse solver */ 3185 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3186 3187 /* free */ 3188 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3189 PetscFunctionReturn(0); 3190 } 3191 3192 PetscErrorCode PCBDDCResetCustomization(PC pc) 3193 { 3194 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3195 PetscErrorCode ierr; 3196 3197 PetscFunctionBegin; 3198 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3199 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3200 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3201 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3202 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3203 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3204 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3205 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3206 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3207 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3208 PetscFunctionReturn(0); 3209 } 3210 3211 PetscErrorCode PCBDDCResetTopography(PC pc) 3212 { 3213 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3214 PetscInt i; 3215 PetscErrorCode ierr; 3216 3217 PetscFunctionBegin; 3218 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3219 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3220 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3221 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3222 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3223 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3224 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3225 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3226 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3227 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3228 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3229 for (i=0;i<pcbddc->n_local_subs;i++) { 3230 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3231 } 3232 pcbddc->n_local_subs = 0; 3233 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3234 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3235 pcbddc->graphanalyzed = PETSC_FALSE; 3236 pcbddc->recompute_topography = PETSC_TRUE; 3237 PetscFunctionReturn(0); 3238 } 3239 3240 PetscErrorCode PCBDDCResetSolvers(PC pc) 3241 { 3242 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3243 PetscErrorCode ierr; 3244 3245 PetscFunctionBegin; 3246 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3247 if (pcbddc->coarse_phi_B) { 3248 PetscScalar *array; 3249 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3250 ierr = PetscFree(array);CHKERRQ(ierr); 3251 } 3252 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3253 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3254 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3255 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3256 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3257 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3258 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3259 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3260 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3261 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3262 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3263 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3264 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3265 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3266 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3267 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3268 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3269 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3270 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3271 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3272 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3273 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3274 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3275 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3276 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3277 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3278 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3279 if (pcbddc->benign_zerodiag_subs) { 3280 PetscInt i; 3281 for (i=0;i<pcbddc->benign_n;i++) { 3282 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3283 } 3284 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3285 } 3286 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3287 PetscFunctionReturn(0); 3288 } 3289 3290 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3291 { 3292 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3293 PC_IS *pcis = (PC_IS*)pc->data; 3294 VecType impVecType; 3295 PetscInt n_constraints,n_R,old_size; 3296 PetscErrorCode ierr; 3297 3298 PetscFunctionBegin; 3299 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3300 n_R = pcis->n - pcbddc->n_vertices; 3301 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3302 /* local work vectors (try to avoid unneeded work)*/ 3303 /* R nodes */ 3304 old_size = -1; 3305 if (pcbddc->vec1_R) { 3306 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3307 } 3308 if (n_R != old_size) { 3309 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3310 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3311 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3312 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3313 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3314 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3315 } 3316 /* local primal dofs */ 3317 old_size = -1; 3318 if (pcbddc->vec1_P) { 3319 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3320 } 3321 if (pcbddc->local_primal_size != old_size) { 3322 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3323 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3324 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3325 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3326 } 3327 /* local explicit constraints */ 3328 old_size = -1; 3329 if (pcbddc->vec1_C) { 3330 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3331 } 3332 if (n_constraints && n_constraints != old_size) { 3333 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3334 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3335 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3336 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3337 } 3338 PetscFunctionReturn(0); 3339 } 3340 3341 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3342 { 3343 PetscErrorCode ierr; 3344 /* pointers to pcis and pcbddc */ 3345 PC_IS* pcis = (PC_IS*)pc->data; 3346 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3347 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3348 /* submatrices of local problem */ 3349 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3350 /* submatrices of local coarse problem */ 3351 Mat S_VV,S_CV,S_VC,S_CC; 3352 /* working matrices */ 3353 Mat C_CR; 3354 /* additional working stuff */ 3355 PC pc_R; 3356 Mat F,Brhs = NULL; 3357 Vec dummy_vec; 3358 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3359 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3360 PetscScalar *work; 3361 PetscInt *idx_V_B; 3362 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3363 PetscInt i,n_R,n_D,n_B; 3364 3365 /* some shortcuts to scalars */ 3366 PetscScalar one=1.0,m_one=-1.0; 3367 3368 PetscFunctionBegin; 3369 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3370 3371 /* Set Non-overlapping dimensions */ 3372 n_vertices = pcbddc->n_vertices; 3373 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3374 n_B = pcis->n_B; 3375 n_D = pcis->n - n_B; 3376 n_R = pcis->n - n_vertices; 3377 3378 /* vertices in boundary numbering */ 3379 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3380 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3381 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3382 3383 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3384 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3385 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3386 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3387 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3388 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3389 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3390 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3391 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3392 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3393 3394 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3395 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3396 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3397 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3398 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3399 lda_rhs = n_R; 3400 need_benign_correction = PETSC_FALSE; 3401 if (isLU || isILU || isCHOL) { 3402 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3403 } else if (sub_schurs && sub_schurs->reuse_solver) { 3404 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3405 MatFactorType type; 3406 3407 F = reuse_solver->F; 3408 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3409 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3410 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3411 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3412 } else { 3413 F = NULL; 3414 } 3415 3416 /* determine if we can use a sparse right-hand side */ 3417 sparserhs = PETSC_FALSE; 3418 if (F) { 3419 const MatSolverPackage solver; 3420 3421 ierr = MatFactorGetSolverPackage(F,&solver);CHKERRQ(ierr); 3422 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3423 } 3424 3425 /* allocate workspace */ 3426 n = 0; 3427 if (n_constraints) { 3428 n += lda_rhs*n_constraints; 3429 } 3430 if (n_vertices) { 3431 n = PetscMax(2*lda_rhs*n_vertices,n); 3432 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3433 } 3434 if (!pcbddc->symmetric_primal) { 3435 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3436 } 3437 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3438 3439 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3440 dummy_vec = NULL; 3441 if (need_benign_correction && lda_rhs != n_R && F) { 3442 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3443 } 3444 3445 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3446 if (n_constraints) { 3447 Mat M1,M2,M3,C_B; 3448 IS is_aux; 3449 PetscScalar *array,*array2; 3450 3451 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3452 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3453 3454 /* Extract constraints on R nodes: C_{CR} */ 3455 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3456 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3457 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3458 3459 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3460 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3461 if (!sparserhs) { 3462 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3463 for (i=0;i<n_constraints;i++) { 3464 const PetscScalar *row_cmat_values; 3465 const PetscInt *row_cmat_indices; 3466 PetscInt size_of_constraint,j; 3467 3468 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3469 for (j=0;j<size_of_constraint;j++) { 3470 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3471 } 3472 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3473 } 3474 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3475 } else { 3476 Mat tC_CR; 3477 3478 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3479 if (lda_rhs != n_R) { 3480 PetscScalar *aa; 3481 PetscInt r,*ii,*jj; 3482 PetscBool done; 3483 3484 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3485 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");CHKERRQ(ierr); 3486 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3487 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3488 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3489 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");CHKERRQ(ierr); 3490 } else { 3491 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3492 tC_CR = C_CR; 3493 } 3494 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3495 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3496 } 3497 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3498 if (F) { 3499 if (need_benign_correction) { 3500 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3501 3502 /* rhs is already zero on interior dofs, no need to change the rhs */ 3503 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3504 } 3505 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3506 if (need_benign_correction) { 3507 PetscScalar *marr; 3508 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3509 3510 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3511 if (lda_rhs != n_R) { 3512 for (i=0;i<n_constraints;i++) { 3513 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3514 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3515 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3516 } 3517 } else { 3518 for (i=0;i<n_constraints;i++) { 3519 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3520 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3521 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3522 } 3523 } 3524 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3525 } 3526 } else { 3527 PetscScalar *marr; 3528 3529 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3530 for (i=0;i<n_constraints;i++) { 3531 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3532 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3533 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3534 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3535 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3536 } 3537 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3538 } 3539 if (sparserhs) { 3540 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3541 } 3542 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3543 if (!pcbddc->switch_static) { 3544 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3545 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3546 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3547 for (i=0;i<n_constraints;i++) { 3548 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3549 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3550 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3551 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3552 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3553 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3554 } 3555 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3556 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3557 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3558 } else { 3559 if (lda_rhs != n_R) { 3560 IS dummy; 3561 3562 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3563 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3564 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3565 } else { 3566 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3567 pcbddc->local_auxmat2 = local_auxmat2_R; 3568 } 3569 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3570 } 3571 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3572 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3573 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3574 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3575 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3576 if (isCHOL) { 3577 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3578 } else { 3579 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3580 } 3581 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3582 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3583 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3584 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3585 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3586 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3587 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3588 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3589 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3590 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3591 } 3592 3593 /* Get submatrices from subdomain matrix */ 3594 if (n_vertices) { 3595 IS is_aux; 3596 PetscBool isseqaij; 3597 3598 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3599 IS tis; 3600 3601 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3602 ierr = ISSort(tis);CHKERRQ(ierr); 3603 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3604 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3605 } else { 3606 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3607 } 3608 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3609 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3610 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3611 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3612 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3613 } 3614 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3615 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3616 } 3617 3618 /* Matrix of coarse basis functions (local) */ 3619 if (pcbddc->coarse_phi_B) { 3620 PetscInt on_B,on_primal,on_D=n_D; 3621 if (pcbddc->coarse_phi_D) { 3622 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3623 } 3624 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3625 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3626 PetscScalar *marray; 3627 3628 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3629 ierr = PetscFree(marray);CHKERRQ(ierr); 3630 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3631 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3632 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3633 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3634 } 3635 } 3636 3637 if (!pcbddc->coarse_phi_B) { 3638 PetscScalar *marr; 3639 3640 /* memory size */ 3641 n = n_B*pcbddc->local_primal_size; 3642 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3643 if (!pcbddc->symmetric_primal) n *= 2; 3644 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3645 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3646 marr += n_B*pcbddc->local_primal_size; 3647 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3648 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3649 marr += n_D*pcbddc->local_primal_size; 3650 } 3651 if (!pcbddc->symmetric_primal) { 3652 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3653 marr += n_B*pcbddc->local_primal_size; 3654 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3655 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3656 } 3657 } else { 3658 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3659 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3660 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3661 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3662 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3663 } 3664 } 3665 } 3666 3667 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3668 p0_lidx_I = NULL; 3669 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3670 const PetscInt *idxs; 3671 3672 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3673 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3674 for (i=0;i<pcbddc->benign_n;i++) { 3675 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3676 } 3677 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3678 } 3679 3680 /* vertices */ 3681 if (n_vertices) { 3682 PetscBool restoreavr = PETSC_FALSE; 3683 3684 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3685 3686 if (n_R) { 3687 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3688 PetscBLASInt B_N,B_one = 1; 3689 PetscScalar *x,*y; 3690 3691 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3692 if (need_benign_correction) { 3693 ISLocalToGlobalMapping RtoN; 3694 IS is_p0; 3695 PetscInt *idxs_p0,n; 3696 3697 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3698 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3699 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3700 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3701 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3702 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3703 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3704 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3705 } 3706 3707 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3708 if (!sparserhs || need_benign_correction) { 3709 if (lda_rhs == n_R) { 3710 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3711 } else { 3712 PetscScalar *av,*array; 3713 const PetscInt *xadj,*adjncy; 3714 PetscInt n; 3715 PetscBool flg_row; 3716 3717 array = work+lda_rhs*n_vertices; 3718 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3719 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3720 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3721 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3722 for (i=0;i<n;i++) { 3723 PetscInt j; 3724 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3725 } 3726 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3727 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3728 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3729 } 3730 if (need_benign_correction) { 3731 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3732 PetscScalar *marr; 3733 3734 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3735 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3736 3737 | 0 0 0 | (V) 3738 L = | 0 0 -1 | (P-p0) 3739 | 0 0 -1 | (p0) 3740 3741 */ 3742 for (i=0;i<reuse_solver->benign_n;i++) { 3743 const PetscScalar *vals; 3744 const PetscInt *idxs,*idxs_zero; 3745 PetscInt n,j,nz; 3746 3747 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3748 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3749 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3750 for (j=0;j<n;j++) { 3751 PetscScalar val = vals[j]; 3752 PetscInt k,col = idxs[j]; 3753 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3754 } 3755 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3756 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3757 } 3758 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3759 } 3760 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3761 Brhs = A_RV; 3762 } else { 3763 Mat tA_RVT,A_RVT; 3764 3765 if (!pcbddc->symmetric_primal) { 3766 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3767 } else { 3768 restoreavr = PETSC_TRUE; 3769 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3770 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3771 A_RVT = A_VR; 3772 } 3773 if (lda_rhs != n_R) { 3774 PetscScalar *aa; 3775 PetscInt r,*ii,*jj; 3776 PetscBool done; 3777 3778 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3779 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");CHKERRQ(ierr); 3780 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 3781 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 3782 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3783 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");CHKERRQ(ierr); 3784 } else { 3785 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 3786 tA_RVT = A_RVT; 3787 } 3788 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 3789 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 3790 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 3791 } 3792 if (F) { 3793 /* need to correct the rhs */ 3794 if (need_benign_correction) { 3795 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3796 PetscScalar *marr; 3797 3798 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 3799 if (lda_rhs != n_R) { 3800 for (i=0;i<n_vertices;i++) { 3801 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3802 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3803 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3804 } 3805 } else { 3806 for (i=0;i<n_vertices;i++) { 3807 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3808 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3809 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3810 } 3811 } 3812 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 3813 } 3814 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 3815 if (restoreavr) { 3816 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3817 } 3818 /* need to correct the solution */ 3819 if (need_benign_correction) { 3820 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3821 PetscScalar *marr; 3822 3823 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3824 if (lda_rhs != n_R) { 3825 for (i=0;i<n_vertices;i++) { 3826 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3827 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3828 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3829 } 3830 } else { 3831 for (i=0;i<n_vertices;i++) { 3832 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3833 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3834 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3835 } 3836 } 3837 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3838 } 3839 } else { 3840 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 3841 for (i=0;i<n_vertices;i++) { 3842 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3843 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3844 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3845 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3846 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3847 } 3848 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 3849 } 3850 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3851 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3852 /* S_VV and S_CV */ 3853 if (n_constraints) { 3854 Mat B; 3855 3856 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3857 for (i=0;i<n_vertices;i++) { 3858 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3859 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3860 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3861 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3862 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3863 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3864 } 3865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3866 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3867 ierr = MatDestroy(&B);CHKERRQ(ierr); 3868 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3869 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3870 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3871 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3872 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3873 ierr = MatDestroy(&B);CHKERRQ(ierr); 3874 } 3875 if (lda_rhs != n_R) { 3876 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3877 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3878 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3879 } 3880 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3881 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3882 if (need_benign_correction) { 3883 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3884 PetscScalar *marr,*sums; 3885 3886 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3887 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3888 for (i=0;i<reuse_solver->benign_n;i++) { 3889 const PetscScalar *vals; 3890 const PetscInt *idxs,*idxs_zero; 3891 PetscInt n,j,nz; 3892 3893 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3894 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3895 for (j=0;j<n_vertices;j++) { 3896 PetscInt k; 3897 sums[j] = 0.; 3898 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3899 } 3900 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3901 for (j=0;j<n;j++) { 3902 PetscScalar val = vals[j]; 3903 PetscInt k; 3904 for (k=0;k<n_vertices;k++) { 3905 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3906 } 3907 } 3908 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3909 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3910 } 3911 ierr = PetscFree(sums);CHKERRQ(ierr); 3912 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3913 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3914 } 3915 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3916 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3917 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3918 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3919 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3920 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3921 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3922 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3923 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3924 } else { 3925 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3926 } 3927 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3928 3929 /* coarse basis functions */ 3930 for (i=0;i<n_vertices;i++) { 3931 PetscScalar *y; 3932 3933 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3934 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3935 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3936 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3937 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3938 y[n_B*i+idx_V_B[i]] = 1.0; 3939 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3940 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3941 3942 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3943 PetscInt j; 3944 3945 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3946 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3947 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3948 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3949 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3950 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3951 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3952 } 3953 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3954 } 3955 /* if n_R == 0 the object is not destroyed */ 3956 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3957 } 3958 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3959 3960 if (n_constraints) { 3961 Mat B; 3962 3963 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3964 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3965 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3966 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3967 if (n_vertices) { 3968 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3969 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3970 } else { 3971 Mat S_VCt; 3972 3973 if (lda_rhs != n_R) { 3974 ierr = MatDestroy(&B);CHKERRQ(ierr); 3975 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3976 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3977 } 3978 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3979 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3980 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3981 } 3982 } 3983 ierr = MatDestroy(&B);CHKERRQ(ierr); 3984 /* coarse basis functions */ 3985 for (i=0;i<n_constraints;i++) { 3986 PetscScalar *y; 3987 3988 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3989 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3990 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3991 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3992 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3993 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3994 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3995 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3996 PetscInt j; 3997 3998 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3999 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4000 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4001 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4002 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4003 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4004 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4005 } 4006 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4007 } 4008 } 4009 if (n_constraints) { 4010 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4011 } 4012 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4013 4014 /* coarse matrix entries relative to B_0 */ 4015 if (pcbddc->benign_n) { 4016 Mat B0_B,B0_BPHI; 4017 IS is_dummy; 4018 PetscScalar *data; 4019 PetscInt j; 4020 4021 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4022 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4023 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4024 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4025 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4026 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4027 for (j=0;j<pcbddc->benign_n;j++) { 4028 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4029 for (i=0;i<pcbddc->local_primal_size;i++) { 4030 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4031 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4032 } 4033 } 4034 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4035 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4036 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4037 } 4038 4039 /* compute other basis functions for non-symmetric problems */ 4040 if (!pcbddc->symmetric_primal) { 4041 Mat B_V=NULL,B_C=NULL; 4042 PetscScalar *marray; 4043 4044 if (n_constraints) { 4045 Mat S_CCT,C_CRT; 4046 4047 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4048 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4049 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4050 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4051 if (n_vertices) { 4052 Mat S_VCT; 4053 4054 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4055 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4056 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4057 } 4058 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4059 } else { 4060 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4061 } 4062 if (n_vertices && n_R) { 4063 PetscScalar *av,*marray; 4064 const PetscInt *xadj,*adjncy; 4065 PetscInt n; 4066 PetscBool flg_row; 4067 4068 /* B_V = B_V - A_VR^T */ 4069 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4070 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4071 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4072 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4073 for (i=0;i<n;i++) { 4074 PetscInt j; 4075 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4076 } 4077 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4078 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4079 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4080 } 4081 4082 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4083 if (n_vertices) { 4084 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4085 for (i=0;i<n_vertices;i++) { 4086 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4087 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4088 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4089 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4090 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4091 } 4092 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4093 } 4094 if (B_C) { 4095 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4096 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4097 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4098 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4099 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4100 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4101 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4102 } 4103 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4104 } 4105 /* coarse basis functions */ 4106 for (i=0;i<pcbddc->local_primal_size;i++) { 4107 PetscScalar *y; 4108 4109 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4110 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4111 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4112 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4113 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4114 if (i<n_vertices) { 4115 y[n_B*i+idx_V_B[i]] = 1.0; 4116 } 4117 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4118 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4119 4120 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4121 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4122 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4123 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4124 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4125 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4126 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4127 } 4128 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4129 } 4130 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4131 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4132 } 4133 4134 /* free memory */ 4135 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4136 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4137 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4138 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4139 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4140 ierr = PetscFree(work);CHKERRQ(ierr); 4141 if (n_vertices) { 4142 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4143 } 4144 if (n_constraints) { 4145 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4146 } 4147 /* Checking coarse_sub_mat and coarse basis functios */ 4148 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4149 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4150 if (pcbddc->dbg_flag) { 4151 Mat coarse_sub_mat; 4152 Mat AUXMAT,TM1,TM2,TM3,TM4; 4153 Mat coarse_phi_D,coarse_phi_B; 4154 Mat coarse_psi_D,coarse_psi_B; 4155 Mat A_II,A_BB,A_IB,A_BI; 4156 Mat C_B,CPHI; 4157 IS is_dummy; 4158 Vec mones; 4159 MatType checkmattype=MATSEQAIJ; 4160 PetscReal real_value; 4161 4162 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4163 Mat A; 4164 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4165 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4166 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4167 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4168 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4169 ierr = MatDestroy(&A);CHKERRQ(ierr); 4170 } else { 4171 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4172 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4173 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4174 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4175 } 4176 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4177 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4178 if (!pcbddc->symmetric_primal) { 4179 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4180 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4181 } 4182 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4183 4184 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4185 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4187 if (!pcbddc->symmetric_primal) { 4188 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4189 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4190 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4191 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4192 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4193 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4194 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4195 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4196 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4197 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4198 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4199 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4200 } else { 4201 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4202 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4203 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4204 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4205 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4206 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4207 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4208 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4209 } 4210 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4211 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4212 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4213 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4214 if (pcbddc->benign_n) { 4215 Mat B0_B,B0_BPHI; 4216 PetscScalar *data,*data2; 4217 PetscInt j; 4218 4219 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4220 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4221 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4222 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4223 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4224 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4225 for (j=0;j<pcbddc->benign_n;j++) { 4226 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4227 for (i=0;i<pcbddc->local_primal_size;i++) { 4228 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4229 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4230 } 4231 } 4232 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4233 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4234 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4235 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4236 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4237 } 4238 #if 0 4239 { 4240 PetscViewer viewer; 4241 char filename[256]; 4242 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4243 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4244 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4245 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4246 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4247 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4248 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4249 if (save_change) { 4250 Mat phi_B; 4251 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4252 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4253 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4254 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4255 } else { 4256 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4257 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4258 } 4259 if (pcbddc->coarse_phi_D) { 4260 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4261 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4262 } 4263 if (pcbddc->coarse_psi_B) { 4264 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4265 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4266 } 4267 if (pcbddc->coarse_psi_D) { 4268 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4269 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4270 } 4271 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4272 } 4273 #endif 4274 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4275 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4276 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4278 4279 /* check constraints */ 4280 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4281 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4282 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4283 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4284 } else { 4285 PetscScalar *data; 4286 Mat tmat; 4287 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4288 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4289 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4290 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4291 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4292 } 4293 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4294 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4295 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4296 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4297 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4298 if (!pcbddc->symmetric_primal) { 4299 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4300 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4301 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4302 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4303 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4304 } 4305 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4306 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4307 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4308 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4310 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4311 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4312 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4313 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4314 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4315 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4316 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4317 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4318 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4319 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4320 if (!pcbddc->symmetric_primal) { 4321 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4322 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4323 } 4324 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4325 } 4326 /* get back data */ 4327 *coarse_submat_vals_n = coarse_submat_vals; 4328 PetscFunctionReturn(0); 4329 } 4330 4331 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4332 { 4333 Mat *work_mat; 4334 IS isrow_s,iscol_s; 4335 PetscBool rsorted,csorted; 4336 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4337 PetscErrorCode ierr; 4338 4339 PetscFunctionBegin; 4340 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4341 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4342 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4343 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4344 4345 if (!rsorted) { 4346 const PetscInt *idxs; 4347 PetscInt *idxs_sorted,i; 4348 4349 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4350 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4351 for (i=0;i<rsize;i++) { 4352 idxs_perm_r[i] = i; 4353 } 4354 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4355 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4356 for (i=0;i<rsize;i++) { 4357 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4358 } 4359 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4360 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4361 } else { 4362 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4363 isrow_s = isrow; 4364 } 4365 4366 if (!csorted) { 4367 if (isrow == iscol) { 4368 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4369 iscol_s = isrow_s; 4370 } else { 4371 const PetscInt *idxs; 4372 PetscInt *idxs_sorted,i; 4373 4374 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4375 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4376 for (i=0;i<csize;i++) { 4377 idxs_perm_c[i] = i; 4378 } 4379 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4380 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4381 for (i=0;i<csize;i++) { 4382 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4383 } 4384 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4385 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4386 } 4387 } else { 4388 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4389 iscol_s = iscol; 4390 } 4391 4392 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4393 4394 if (!rsorted || !csorted) { 4395 Mat new_mat; 4396 IS is_perm_r,is_perm_c; 4397 4398 if (!rsorted) { 4399 PetscInt *idxs_r,i; 4400 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4401 for (i=0;i<rsize;i++) { 4402 idxs_r[idxs_perm_r[i]] = i; 4403 } 4404 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4405 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4406 } else { 4407 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4408 } 4409 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4410 4411 if (!csorted) { 4412 if (isrow_s == iscol_s) { 4413 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4414 is_perm_c = is_perm_r; 4415 } else { 4416 PetscInt *idxs_c,i; 4417 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4418 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4419 for (i=0;i<csize;i++) { 4420 idxs_c[idxs_perm_c[i]] = i; 4421 } 4422 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4423 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4424 } 4425 } else { 4426 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4427 } 4428 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4429 4430 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4431 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4432 work_mat[0] = new_mat; 4433 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4434 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4435 } 4436 4437 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4438 *B = work_mat[0]; 4439 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4440 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4441 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4442 PetscFunctionReturn(0); 4443 } 4444 4445 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4446 { 4447 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4448 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4449 Mat new_mat,lA; 4450 IS is_local,is_global; 4451 PetscInt local_size; 4452 PetscBool isseqaij; 4453 PetscErrorCode ierr; 4454 4455 PetscFunctionBegin; 4456 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4457 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4458 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4459 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4460 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4461 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4462 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4463 4464 /* check */ 4465 if (pcbddc->dbg_flag) { 4466 Vec x,x_change; 4467 PetscReal error; 4468 4469 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4470 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4471 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4472 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4473 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4474 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4475 if (!pcbddc->change_interior) { 4476 const PetscScalar *x,*y,*v; 4477 PetscReal lerror = 0.; 4478 PetscInt i; 4479 4480 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4481 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4482 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4483 for (i=0;i<local_size;i++) 4484 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4485 lerror = PetscAbsScalar(x[i]-y[i]); 4486 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4487 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4488 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4489 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4490 if (error > PETSC_SMALL) { 4491 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4492 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4493 } else { 4494 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4495 } 4496 } 4497 } 4498 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4499 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4500 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4501 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4502 if (error > PETSC_SMALL) { 4503 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4504 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4505 } else { 4506 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4507 } 4508 } 4509 ierr = VecDestroy(&x);CHKERRQ(ierr); 4510 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4511 } 4512 4513 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4514 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4515 4516 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4517 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4518 if (isseqaij) { 4519 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4520 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4521 if (lA) { 4522 Mat work; 4523 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4524 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4525 ierr = MatDestroy(&work);CHKERRQ(ierr); 4526 } 4527 } else { 4528 Mat work_mat; 4529 4530 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4531 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4532 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4533 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4534 if (lA) { 4535 Mat work; 4536 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4537 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4538 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4539 ierr = MatDestroy(&work);CHKERRQ(ierr); 4540 } 4541 } 4542 if (matis->A->symmetric_set) { 4543 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4544 #if !defined(PETSC_USE_COMPLEX) 4545 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4546 #endif 4547 } 4548 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4549 PetscFunctionReturn(0); 4550 } 4551 4552 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4553 { 4554 PC_IS* pcis = (PC_IS*)(pc->data); 4555 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4556 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4557 PetscInt *idx_R_local=NULL; 4558 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4559 PetscInt vbs,bs; 4560 PetscBT bitmask=NULL; 4561 PetscErrorCode ierr; 4562 4563 PetscFunctionBegin; 4564 /* 4565 No need to setup local scatters if 4566 - primal space is unchanged 4567 AND 4568 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4569 AND 4570 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4571 */ 4572 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4573 PetscFunctionReturn(0); 4574 } 4575 /* destroy old objects */ 4576 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4577 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4578 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4579 /* Set Non-overlapping dimensions */ 4580 n_B = pcis->n_B; 4581 n_D = pcis->n - n_B; 4582 n_vertices = pcbddc->n_vertices; 4583 4584 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4585 4586 /* create auxiliary bitmask and allocate workspace */ 4587 if (!sub_schurs || !sub_schurs->reuse_solver) { 4588 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4589 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4590 for (i=0;i<n_vertices;i++) { 4591 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4592 } 4593 4594 for (i=0, n_R=0; i<pcis->n; i++) { 4595 if (!PetscBTLookup(bitmask,i)) { 4596 idx_R_local[n_R++] = i; 4597 } 4598 } 4599 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4600 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4601 4602 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4603 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4604 } 4605 4606 /* Block code */ 4607 vbs = 1; 4608 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4609 if (bs>1 && !(n_vertices%bs)) { 4610 PetscBool is_blocked = PETSC_TRUE; 4611 PetscInt *vary; 4612 if (!sub_schurs || !sub_schurs->reuse_solver) { 4613 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4614 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4615 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4616 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4617 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4618 for (i=0; i<pcis->n/bs; i++) { 4619 if (vary[i]!=0 && vary[i]!=bs) { 4620 is_blocked = PETSC_FALSE; 4621 break; 4622 } 4623 } 4624 ierr = PetscFree(vary);CHKERRQ(ierr); 4625 } else { 4626 /* Verify directly the R set */ 4627 for (i=0; i<n_R/bs; i++) { 4628 PetscInt j,node=idx_R_local[bs*i]; 4629 for (j=1; j<bs; j++) { 4630 if (node != idx_R_local[bs*i+j]-j) { 4631 is_blocked = PETSC_FALSE; 4632 break; 4633 } 4634 } 4635 } 4636 } 4637 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4638 vbs = bs; 4639 for (i=0;i<n_R/vbs;i++) { 4640 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4641 } 4642 } 4643 } 4644 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4645 if (sub_schurs && sub_schurs->reuse_solver) { 4646 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4647 4648 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4649 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4650 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4651 reuse_solver->is_R = pcbddc->is_R_local; 4652 } else { 4653 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4654 } 4655 4656 /* print some info if requested */ 4657 if (pcbddc->dbg_flag) { 4658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4660 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4661 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4662 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4663 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4664 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4665 } 4666 4667 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4668 if (!sub_schurs || !sub_schurs->reuse_solver) { 4669 IS is_aux1,is_aux2; 4670 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4671 4672 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4673 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4674 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4675 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4676 for (i=0; i<n_D; i++) { 4677 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4678 } 4679 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4680 for (i=0, j=0; i<n_R; i++) { 4681 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4682 aux_array1[j++] = i; 4683 } 4684 } 4685 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4686 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4687 for (i=0, j=0; i<n_B; i++) { 4688 if (!PetscBTLookup(bitmask,is_indices[i])) { 4689 aux_array2[j++] = i; 4690 } 4691 } 4692 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4693 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4694 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4695 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4696 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4697 4698 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4699 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4700 for (i=0, j=0; i<n_R; i++) { 4701 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4702 aux_array1[j++] = i; 4703 } 4704 } 4705 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4706 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4707 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4708 } 4709 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4710 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4711 } else { 4712 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4713 IS tis; 4714 PetscInt schur_size; 4715 4716 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4717 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4718 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4719 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4720 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4721 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4722 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4723 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4724 } 4725 } 4726 PetscFunctionReturn(0); 4727 } 4728 4729 4730 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4731 { 4732 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4733 PC_IS *pcis = (PC_IS*)pc->data; 4734 PC pc_temp; 4735 Mat A_RR; 4736 MatReuse reuse; 4737 PetscScalar m_one = -1.0; 4738 PetscReal value; 4739 PetscInt n_D,n_R; 4740 PetscBool check_corr[2],issbaij; 4741 PetscErrorCode ierr; 4742 /* prefixes stuff */ 4743 char dir_prefix[256],neu_prefix[256],str_level[16]; 4744 size_t len; 4745 4746 PetscFunctionBegin; 4747 4748 /* compute prefixes */ 4749 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4750 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4751 if (!pcbddc->current_level) { 4752 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4753 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4754 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4755 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4756 } else { 4757 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4758 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4759 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4760 len -= 15; /* remove "pc_bddc_coarse_" */ 4761 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4762 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4763 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4764 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4765 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4766 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4767 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4768 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4769 } 4770 4771 /* DIRICHLET PROBLEM */ 4772 if (dirichlet) { 4773 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4774 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4775 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4776 if (pcbddc->dbg_flag) { 4777 Mat A_IIn; 4778 4779 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4780 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4781 pcis->A_II = A_IIn; 4782 } 4783 } 4784 if (pcbddc->local_mat->symmetric_set) { 4785 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4786 } 4787 /* Matrix for Dirichlet problem is pcis->A_II */ 4788 n_D = pcis->n - pcis->n_B; 4789 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4790 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4791 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4792 /* default */ 4793 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4794 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4795 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4796 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4797 if (issbaij) { 4798 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4799 } else { 4800 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4801 } 4802 /* Allow user's customization */ 4803 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4804 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4805 } 4806 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4807 if (sub_schurs && sub_schurs->reuse_solver) { 4808 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4809 4810 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4811 } 4812 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4813 if (!n_D) { 4814 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4815 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4816 } 4817 /* Set Up KSP for Dirichlet problem of BDDC */ 4818 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4819 /* set ksp_D into pcis data */ 4820 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4821 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4822 pcis->ksp_D = pcbddc->ksp_D; 4823 } 4824 4825 /* NEUMANN PROBLEM */ 4826 A_RR = 0; 4827 if (neumann) { 4828 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4829 PetscInt ibs,mbs; 4830 PetscBool issbaij; 4831 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4832 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4833 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4834 if (pcbddc->ksp_R) { /* already created ksp */ 4835 PetscInt nn_R; 4836 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4837 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4838 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4839 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4840 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4841 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4842 reuse = MAT_INITIAL_MATRIX; 4843 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4844 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4845 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4846 reuse = MAT_INITIAL_MATRIX; 4847 } else { /* safe to reuse the matrix */ 4848 reuse = MAT_REUSE_MATRIX; 4849 } 4850 } 4851 /* last check */ 4852 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4853 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4854 reuse = MAT_INITIAL_MATRIX; 4855 } 4856 } else { /* first time, so we need to create the matrix */ 4857 reuse = MAT_INITIAL_MATRIX; 4858 } 4859 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4860 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4861 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4862 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4863 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4864 if (matis->A == pcbddc->local_mat) { 4865 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4866 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4867 } else { 4868 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4869 } 4870 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4871 if (matis->A == pcbddc->local_mat) { 4872 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4873 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4874 } else { 4875 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4876 } 4877 } 4878 /* extract A_RR */ 4879 if (sub_schurs && sub_schurs->reuse_solver) { 4880 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4881 4882 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4883 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4884 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4885 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4886 } else { 4887 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4888 } 4889 } else { 4890 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4891 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4892 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4893 } 4894 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4895 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4896 } 4897 if (pcbddc->local_mat->symmetric_set) { 4898 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4899 } 4900 if (!pcbddc->ksp_R) { /* create object if not present */ 4901 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4902 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4903 /* default */ 4904 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4905 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4906 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4907 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4908 if (issbaij) { 4909 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4910 } else { 4911 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4912 } 4913 /* Allow user's customization */ 4914 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4915 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4916 } 4917 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4918 if (!n_R) { 4919 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4920 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4921 } 4922 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4923 /* Reuse solver if it is present */ 4924 if (sub_schurs && sub_schurs->reuse_solver && sub_schurs->A == pcbddc->local_mat) { 4925 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4926 4927 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4928 } 4929 /* Set Up KSP for Neumann problem of BDDC */ 4930 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4931 } 4932 4933 if (pcbddc->dbg_flag) { 4934 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4935 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4936 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4937 } 4938 4939 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4940 check_corr[0] = check_corr[1] = PETSC_FALSE; 4941 if (pcbddc->NullSpace_corr[0]) { 4942 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4943 } 4944 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4945 check_corr[0] = PETSC_TRUE; 4946 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4947 } 4948 if (neumann && pcbddc->NullSpace_corr[2]) { 4949 check_corr[1] = PETSC_TRUE; 4950 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4951 } 4952 4953 /* check Dirichlet and Neumann solvers */ 4954 if (pcbddc->dbg_flag) { 4955 if (dirichlet) { /* Dirichlet */ 4956 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4957 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4958 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4959 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4960 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4961 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4962 if (check_corr[0]) { 4963 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4964 } 4965 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4966 } 4967 if (neumann) { /* Neumann */ 4968 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4969 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4970 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4971 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4972 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4973 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4974 if (check_corr[1]) { 4975 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4976 } 4977 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4978 } 4979 } 4980 /* free Neumann problem's matrix */ 4981 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4982 PetscFunctionReturn(0); 4983 } 4984 4985 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4986 { 4987 PetscErrorCode ierr; 4988 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4989 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4990 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4991 4992 PetscFunctionBegin; 4993 if (!reuse_solver) { 4994 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4995 } 4996 if (!pcbddc->switch_static) { 4997 if (applytranspose && pcbddc->local_auxmat1) { 4998 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4999 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5000 } 5001 if (!reuse_solver) { 5002 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5003 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5004 } else { 5005 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5006 5007 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5008 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5009 } 5010 } else { 5011 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5012 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5013 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5014 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5015 if (applytranspose && pcbddc->local_auxmat1) { 5016 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5017 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5018 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5019 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5020 } 5021 } 5022 if (!reuse_solver || pcbddc->switch_static) { 5023 if (applytranspose) { 5024 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5025 } else { 5026 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5027 } 5028 } else { 5029 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5030 5031 if (applytranspose) { 5032 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5033 } else { 5034 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5035 } 5036 } 5037 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5038 if (!pcbddc->switch_static) { 5039 if (!reuse_solver) { 5040 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5041 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5042 } else { 5043 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5044 5045 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5046 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5047 } 5048 if (!applytranspose && pcbddc->local_auxmat1) { 5049 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5050 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5051 } 5052 } else { 5053 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5054 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5055 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5056 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5057 if (!applytranspose && pcbddc->local_auxmat1) { 5058 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5059 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5060 } 5061 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5062 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5063 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5064 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5065 } 5066 PetscFunctionReturn(0); 5067 } 5068 5069 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5070 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5071 { 5072 PetscErrorCode ierr; 5073 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5074 PC_IS* pcis = (PC_IS*) (pc->data); 5075 const PetscScalar zero = 0.0; 5076 5077 PetscFunctionBegin; 5078 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5079 if (!pcbddc->benign_apply_coarse_only) { 5080 if (applytranspose) { 5081 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5082 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5083 } else { 5084 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5085 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5086 } 5087 } else { 5088 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5089 } 5090 5091 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5092 if (pcbddc->benign_n) { 5093 PetscScalar *array; 5094 PetscInt j; 5095 5096 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5097 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5098 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5099 } 5100 5101 /* start communications from local primal nodes to rhs of coarse solver */ 5102 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5103 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5104 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5105 5106 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5107 if (pcbddc->coarse_ksp) { 5108 Mat coarse_mat; 5109 Vec rhs,sol; 5110 MatNullSpace nullsp; 5111 PetscBool isbddc = PETSC_FALSE; 5112 5113 if (pcbddc->benign_have_null) { 5114 PC coarse_pc; 5115 5116 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5117 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5118 /* we need to propagate to coarser levels the need for a possible benign correction */ 5119 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5120 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5121 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5122 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5123 } 5124 } 5125 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5126 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5127 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5128 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5129 if (nullsp) { 5130 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5131 } 5132 if (applytranspose) { 5133 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5134 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5135 } else { 5136 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5137 PC coarse_pc; 5138 5139 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5140 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5141 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5142 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5143 } else { 5144 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5145 } 5146 } 5147 /* we don't need the benign correction at coarser levels anymore */ 5148 if (pcbddc->benign_have_null && isbddc) { 5149 PC coarse_pc; 5150 PC_BDDC* coarsepcbddc; 5151 5152 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5153 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5154 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5155 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5156 } 5157 if (nullsp) { 5158 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5159 } 5160 } 5161 5162 /* Local solution on R nodes */ 5163 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5164 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5165 } 5166 /* communications from coarse sol to local primal nodes */ 5167 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5168 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5169 5170 /* Sum contributions from the two levels */ 5171 if (!pcbddc->benign_apply_coarse_only) { 5172 if (applytranspose) { 5173 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5174 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5175 } else { 5176 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5177 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5178 } 5179 /* store p0 */ 5180 if (pcbddc->benign_n) { 5181 PetscScalar *array; 5182 PetscInt j; 5183 5184 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5185 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5186 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5187 } 5188 } else { /* expand the coarse solution */ 5189 if (applytranspose) { 5190 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5191 } else { 5192 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5193 } 5194 } 5195 PetscFunctionReturn(0); 5196 } 5197 5198 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5199 { 5200 PetscErrorCode ierr; 5201 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5202 PetscScalar *array; 5203 Vec from,to; 5204 5205 PetscFunctionBegin; 5206 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5207 from = pcbddc->coarse_vec; 5208 to = pcbddc->vec1_P; 5209 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5210 Vec tvec; 5211 5212 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5213 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5214 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5215 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5216 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5217 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5218 } 5219 } else { /* from local to global -> put data in coarse right hand side */ 5220 from = pcbddc->vec1_P; 5221 to = pcbddc->coarse_vec; 5222 } 5223 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5224 PetscFunctionReturn(0); 5225 } 5226 5227 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5228 { 5229 PetscErrorCode ierr; 5230 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5231 PetscScalar *array; 5232 Vec from,to; 5233 5234 PetscFunctionBegin; 5235 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5236 from = pcbddc->coarse_vec; 5237 to = pcbddc->vec1_P; 5238 } else { /* from local to global -> put data in coarse right hand side */ 5239 from = pcbddc->vec1_P; 5240 to = pcbddc->coarse_vec; 5241 } 5242 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5243 if (smode == SCATTER_FORWARD) { 5244 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5245 Vec tvec; 5246 5247 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5248 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5249 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5250 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5251 } 5252 } else { 5253 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5254 ierr = VecResetArray(from);CHKERRQ(ierr); 5255 } 5256 } 5257 PetscFunctionReturn(0); 5258 } 5259 5260 /* uncomment for testing purposes */ 5261 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5262 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5263 { 5264 PetscErrorCode ierr; 5265 PC_IS* pcis = (PC_IS*)(pc->data); 5266 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5267 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5268 /* one and zero */ 5269 PetscScalar one=1.0,zero=0.0; 5270 /* space to store constraints and their local indices */ 5271 PetscScalar *constraints_data; 5272 PetscInt *constraints_idxs,*constraints_idxs_B; 5273 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5274 PetscInt *constraints_n; 5275 /* iterators */ 5276 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5277 /* BLAS integers */ 5278 PetscBLASInt lwork,lierr; 5279 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5280 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5281 /* reuse */ 5282 PetscInt olocal_primal_size,olocal_primal_size_cc; 5283 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5284 /* change of basis */ 5285 PetscBool qr_needed; 5286 PetscBT change_basis,qr_needed_idx; 5287 /* auxiliary stuff */ 5288 PetscInt *nnz,*is_indices; 5289 PetscInt ncc; 5290 /* some quantities */ 5291 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5292 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5293 5294 PetscFunctionBegin; 5295 /* Destroy Mat objects computed previously */ 5296 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5297 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5298 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5299 /* save info on constraints from previous setup (if any) */ 5300 olocal_primal_size = pcbddc->local_primal_size; 5301 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5302 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5303 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5304 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5305 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5306 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5307 5308 if (!pcbddc->adaptive_selection) { 5309 IS ISForVertices,*ISForFaces,*ISForEdges; 5310 MatNullSpace nearnullsp; 5311 const Vec *nearnullvecs; 5312 Vec *localnearnullsp; 5313 PetscScalar *array; 5314 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5315 PetscBool nnsp_has_cnst; 5316 /* LAPACK working arrays for SVD or POD */ 5317 PetscBool skip_lapack,boolforchange; 5318 PetscScalar *work; 5319 PetscReal *singular_vals; 5320 #if defined(PETSC_USE_COMPLEX) 5321 PetscReal *rwork; 5322 #endif 5323 #if defined(PETSC_MISSING_LAPACK_GESVD) 5324 PetscScalar *temp_basis,*correlation_mat; 5325 #else 5326 PetscBLASInt dummy_int=1; 5327 PetscScalar dummy_scalar=1.; 5328 #endif 5329 5330 /* Get index sets for faces, edges and vertices from graph */ 5331 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5332 /* print some info */ 5333 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5334 PetscInt nv; 5335 5336 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5337 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5338 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5339 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5340 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5341 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5342 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5343 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5344 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5345 } 5346 5347 /* free unneeded index sets */ 5348 if (!pcbddc->use_vertices) { 5349 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5350 } 5351 if (!pcbddc->use_edges) { 5352 for (i=0;i<n_ISForEdges;i++) { 5353 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5354 } 5355 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5356 n_ISForEdges = 0; 5357 } 5358 if (!pcbddc->use_faces) { 5359 for (i=0;i<n_ISForFaces;i++) { 5360 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5361 } 5362 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5363 n_ISForFaces = 0; 5364 } 5365 5366 /* check if near null space is attached to global mat */ 5367 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5368 if (nearnullsp) { 5369 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5370 /* remove any stored info */ 5371 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5372 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5373 /* store information for BDDC solver reuse */ 5374 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5375 pcbddc->onearnullspace = nearnullsp; 5376 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5377 for (i=0;i<nnsp_size;i++) { 5378 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5379 } 5380 } else { /* if near null space is not provided BDDC uses constants by default */ 5381 nnsp_size = 0; 5382 nnsp_has_cnst = PETSC_TRUE; 5383 } 5384 /* get max number of constraints on a single cc */ 5385 max_constraints = nnsp_size; 5386 if (nnsp_has_cnst) max_constraints++; 5387 5388 /* 5389 Evaluate maximum storage size needed by the procedure 5390 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5391 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5392 There can be multiple constraints per connected component 5393 */ 5394 n_vertices = 0; 5395 if (ISForVertices) { 5396 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5397 } 5398 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5399 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5400 5401 total_counts = n_ISForFaces+n_ISForEdges; 5402 total_counts *= max_constraints; 5403 total_counts += n_vertices; 5404 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5405 5406 total_counts = 0; 5407 max_size_of_constraint = 0; 5408 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5409 IS used_is; 5410 if (i<n_ISForEdges) { 5411 used_is = ISForEdges[i]; 5412 } else { 5413 used_is = ISForFaces[i-n_ISForEdges]; 5414 } 5415 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5416 total_counts += j; 5417 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5418 } 5419 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5420 5421 /* get local part of global near null space vectors */ 5422 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5423 for (k=0;k<nnsp_size;k++) { 5424 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5425 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5426 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5427 } 5428 5429 /* whether or not to skip lapack calls */ 5430 skip_lapack = PETSC_TRUE; 5431 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5432 5433 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5434 if (!skip_lapack) { 5435 PetscScalar temp_work; 5436 5437 #if defined(PETSC_MISSING_LAPACK_GESVD) 5438 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5439 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5440 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5441 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5442 #if defined(PETSC_USE_COMPLEX) 5443 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5444 #endif 5445 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5446 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5447 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5448 lwork = -1; 5449 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5450 #if !defined(PETSC_USE_COMPLEX) 5451 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5452 #else 5453 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5454 #endif 5455 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5456 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5457 #else /* on missing GESVD */ 5458 /* SVD */ 5459 PetscInt max_n,min_n; 5460 max_n = max_size_of_constraint; 5461 min_n = max_constraints; 5462 if (max_size_of_constraint < max_constraints) { 5463 min_n = max_size_of_constraint; 5464 max_n = max_constraints; 5465 } 5466 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5467 #if defined(PETSC_USE_COMPLEX) 5468 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5469 #endif 5470 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5471 lwork = -1; 5472 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5473 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5474 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5475 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5476 #if !defined(PETSC_USE_COMPLEX) 5477 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5478 #else 5479 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5480 #endif 5481 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5482 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5483 #endif /* on missing GESVD */ 5484 /* Allocate optimal workspace */ 5485 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5486 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5487 } 5488 /* Now we can loop on constraining sets */ 5489 total_counts = 0; 5490 constraints_idxs_ptr[0] = 0; 5491 constraints_data_ptr[0] = 0; 5492 /* vertices */ 5493 if (n_vertices) { 5494 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5495 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5496 for (i=0;i<n_vertices;i++) { 5497 constraints_n[total_counts] = 1; 5498 constraints_data[total_counts] = 1.0; 5499 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5500 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5501 total_counts++; 5502 } 5503 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5504 n_vertices = total_counts; 5505 } 5506 5507 /* edges and faces */ 5508 total_counts_cc = total_counts; 5509 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5510 IS used_is; 5511 PetscBool idxs_copied = PETSC_FALSE; 5512 5513 if (ncc<n_ISForEdges) { 5514 used_is = ISForEdges[ncc]; 5515 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5516 } else { 5517 used_is = ISForFaces[ncc-n_ISForEdges]; 5518 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5519 } 5520 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5521 5522 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5523 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5524 /* change of basis should not be performed on local periodic nodes */ 5525 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5526 if (nnsp_has_cnst) { 5527 PetscScalar quad_value; 5528 5529 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5530 idxs_copied = PETSC_TRUE; 5531 5532 if (!pcbddc->use_nnsp_true) { 5533 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5534 } else { 5535 quad_value = 1.0; 5536 } 5537 for (j=0;j<size_of_constraint;j++) { 5538 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5539 } 5540 temp_constraints++; 5541 total_counts++; 5542 } 5543 for (k=0;k<nnsp_size;k++) { 5544 PetscReal real_value; 5545 PetscScalar *ptr_to_data; 5546 5547 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5548 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5549 for (j=0;j<size_of_constraint;j++) { 5550 ptr_to_data[j] = array[is_indices[j]]; 5551 } 5552 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5553 /* check if array is null on the connected component */ 5554 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5555 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5556 if (real_value > 0.0) { /* keep indices and values */ 5557 temp_constraints++; 5558 total_counts++; 5559 if (!idxs_copied) { 5560 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5561 idxs_copied = PETSC_TRUE; 5562 } 5563 } 5564 } 5565 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5566 valid_constraints = temp_constraints; 5567 if (!pcbddc->use_nnsp_true && temp_constraints) { 5568 if (temp_constraints == 1) { /* just normalize the constraint */ 5569 PetscScalar norm,*ptr_to_data; 5570 5571 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5572 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5573 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5574 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5575 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5576 } else { /* perform SVD */ 5577 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5578 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5579 5580 #if defined(PETSC_MISSING_LAPACK_GESVD) 5581 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5582 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5583 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5584 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5585 from that computed using LAPACKgesvd 5586 -> This is due to a different computation of eigenvectors in LAPACKheev 5587 -> The quality of the POD-computed basis will be the same */ 5588 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5589 /* Store upper triangular part of correlation matrix */ 5590 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5591 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5592 for (j=0;j<temp_constraints;j++) { 5593 for (k=0;k<j+1;k++) { 5594 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5595 } 5596 } 5597 /* compute eigenvalues and eigenvectors of correlation matrix */ 5598 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5599 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5600 #if !defined(PETSC_USE_COMPLEX) 5601 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5602 #else 5603 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5604 #endif 5605 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5606 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5607 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5608 j = 0; 5609 while (j < temp_constraints && singular_vals[j] < tol) j++; 5610 total_counts = total_counts-j; 5611 valid_constraints = temp_constraints-j; 5612 /* scale and copy POD basis into used quadrature memory */ 5613 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5614 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5615 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5616 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5617 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5618 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5619 if (j<temp_constraints) { 5620 PetscInt ii; 5621 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5622 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5623 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5624 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5625 for (k=0;k<temp_constraints-j;k++) { 5626 for (ii=0;ii<size_of_constraint;ii++) { 5627 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5628 } 5629 } 5630 } 5631 #else /* on missing GESVD */ 5632 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5633 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5634 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5635 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5636 #if !defined(PETSC_USE_COMPLEX) 5637 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5638 #else 5639 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5640 #endif 5641 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5642 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5643 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5644 k = temp_constraints; 5645 if (k > size_of_constraint) k = size_of_constraint; 5646 j = 0; 5647 while (j < k && singular_vals[k-j-1] < tol) j++; 5648 valid_constraints = k-j; 5649 total_counts = total_counts-temp_constraints+valid_constraints; 5650 #endif /* on missing GESVD */ 5651 } 5652 } 5653 /* update pointers information */ 5654 if (valid_constraints) { 5655 constraints_n[total_counts_cc] = valid_constraints; 5656 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5657 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5658 /* set change_of_basis flag */ 5659 if (boolforchange) { 5660 PetscBTSet(change_basis,total_counts_cc); 5661 } 5662 total_counts_cc++; 5663 } 5664 } 5665 /* free workspace */ 5666 if (!skip_lapack) { 5667 ierr = PetscFree(work);CHKERRQ(ierr); 5668 #if defined(PETSC_USE_COMPLEX) 5669 ierr = PetscFree(rwork);CHKERRQ(ierr); 5670 #endif 5671 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5672 #if defined(PETSC_MISSING_LAPACK_GESVD) 5673 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5674 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5675 #endif 5676 } 5677 for (k=0;k<nnsp_size;k++) { 5678 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5679 } 5680 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5681 /* free index sets of faces, edges and vertices */ 5682 for (i=0;i<n_ISForFaces;i++) { 5683 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5684 } 5685 if (n_ISForFaces) { 5686 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5687 } 5688 for (i=0;i<n_ISForEdges;i++) { 5689 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5690 } 5691 if (n_ISForEdges) { 5692 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5693 } 5694 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5695 } else { 5696 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5697 5698 total_counts = 0; 5699 n_vertices = 0; 5700 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5701 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5702 } 5703 max_constraints = 0; 5704 total_counts_cc = 0; 5705 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5706 total_counts += pcbddc->adaptive_constraints_n[i]; 5707 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5708 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5709 } 5710 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5711 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5712 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5713 constraints_data = pcbddc->adaptive_constraints_data; 5714 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5715 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5716 total_counts_cc = 0; 5717 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5718 if (pcbddc->adaptive_constraints_n[i]) { 5719 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5720 } 5721 } 5722 #if 0 5723 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5724 for (i=0;i<total_counts_cc;i++) { 5725 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5726 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5727 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5728 printf(" %d",constraints_idxs[j]); 5729 } 5730 printf("\n"); 5731 printf("number of cc: %d\n",constraints_n[i]); 5732 } 5733 for (i=0;i<n_vertices;i++) { 5734 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5735 } 5736 for (i=0;i<sub_schurs->n_subs;i++) { 5737 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5738 } 5739 #endif 5740 5741 max_size_of_constraint = 0; 5742 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5743 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5744 /* Change of basis */ 5745 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5746 if (pcbddc->use_change_of_basis) { 5747 for (i=0;i<sub_schurs->n_subs;i++) { 5748 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5749 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5750 } 5751 } 5752 } 5753 } 5754 pcbddc->local_primal_size = total_counts; 5755 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5756 5757 /* map constraints_idxs in boundary numbering */ 5758 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5759 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5760 5761 /* Create constraint matrix */ 5762 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5763 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5764 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5765 5766 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5767 /* determine if a QR strategy is needed for change of basis */ 5768 qr_needed = PETSC_FALSE; 5769 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5770 total_primal_vertices=0; 5771 pcbddc->local_primal_size_cc = 0; 5772 for (i=0;i<total_counts_cc;i++) { 5773 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5774 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5775 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5776 pcbddc->local_primal_size_cc += 1; 5777 } else if (PetscBTLookup(change_basis,i)) { 5778 for (k=0;k<constraints_n[i];k++) { 5779 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5780 } 5781 pcbddc->local_primal_size_cc += constraints_n[i]; 5782 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5783 PetscBTSet(qr_needed_idx,i); 5784 qr_needed = PETSC_TRUE; 5785 } 5786 } else { 5787 pcbddc->local_primal_size_cc += 1; 5788 } 5789 } 5790 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5791 pcbddc->n_vertices = total_primal_vertices; 5792 /* permute indices in order to have a sorted set of vertices */ 5793 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5794 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5795 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5796 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5797 5798 /* nonzero structure of constraint matrix */ 5799 /* and get reference dof for local constraints */ 5800 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5801 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5802 5803 j = total_primal_vertices; 5804 total_counts = total_primal_vertices; 5805 cum = total_primal_vertices; 5806 for (i=n_vertices;i<total_counts_cc;i++) { 5807 if (!PetscBTLookup(change_basis,i)) { 5808 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5809 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5810 cum++; 5811 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5812 for (k=0;k<constraints_n[i];k++) { 5813 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5814 nnz[j+k] = size_of_constraint; 5815 } 5816 j += constraints_n[i]; 5817 } 5818 } 5819 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5820 ierr = PetscFree(nnz);CHKERRQ(ierr); 5821 5822 /* set values in constraint matrix */ 5823 for (i=0;i<total_primal_vertices;i++) { 5824 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5825 } 5826 total_counts = total_primal_vertices; 5827 for (i=n_vertices;i<total_counts_cc;i++) { 5828 if (!PetscBTLookup(change_basis,i)) { 5829 PetscInt *cols; 5830 5831 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5832 cols = constraints_idxs+constraints_idxs_ptr[i]; 5833 for (k=0;k<constraints_n[i];k++) { 5834 PetscInt row = total_counts+k; 5835 PetscScalar *vals; 5836 5837 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5838 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5839 } 5840 total_counts += constraints_n[i]; 5841 } 5842 } 5843 /* assembling */ 5844 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5845 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5846 5847 /* 5848 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5849 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5850 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5851 */ 5852 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5853 if (pcbddc->use_change_of_basis) { 5854 /* dual and primal dofs on a single cc */ 5855 PetscInt dual_dofs,primal_dofs; 5856 /* working stuff for GEQRF */ 5857 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5858 PetscBLASInt lqr_work; 5859 /* working stuff for UNGQR */ 5860 PetscScalar *gqr_work,lgqr_work_t; 5861 PetscBLASInt lgqr_work; 5862 /* working stuff for TRTRS */ 5863 PetscScalar *trs_rhs; 5864 PetscBLASInt Blas_NRHS; 5865 /* pointers for values insertion into change of basis matrix */ 5866 PetscInt *start_rows,*start_cols; 5867 PetscScalar *start_vals; 5868 /* working stuff for values insertion */ 5869 PetscBT is_primal; 5870 PetscInt *aux_primal_numbering_B; 5871 /* matrix sizes */ 5872 PetscInt global_size,local_size; 5873 /* temporary change of basis */ 5874 Mat localChangeOfBasisMatrix; 5875 /* extra space for debugging */ 5876 PetscScalar *dbg_work; 5877 5878 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5879 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5880 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5881 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5882 /* nonzeros for local mat */ 5883 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5884 if (!pcbddc->benign_change || pcbddc->fake_change) { 5885 for (i=0;i<pcis->n;i++) nnz[i]=1; 5886 } else { 5887 const PetscInt *ii; 5888 PetscInt n; 5889 PetscBool flg_row; 5890 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5891 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5892 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5893 } 5894 for (i=n_vertices;i<total_counts_cc;i++) { 5895 if (PetscBTLookup(change_basis,i)) { 5896 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5897 if (PetscBTLookup(qr_needed_idx,i)) { 5898 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5899 } else { 5900 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5901 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5902 } 5903 } 5904 } 5905 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5906 ierr = PetscFree(nnz);CHKERRQ(ierr); 5907 /* Set interior change in the matrix */ 5908 if (!pcbddc->benign_change || pcbddc->fake_change) { 5909 for (i=0;i<pcis->n;i++) { 5910 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5911 } 5912 } else { 5913 const PetscInt *ii,*jj; 5914 PetscScalar *aa; 5915 PetscInt n; 5916 PetscBool flg_row; 5917 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5918 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5919 for (i=0;i<n;i++) { 5920 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5921 } 5922 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5923 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5924 } 5925 5926 if (pcbddc->dbg_flag) { 5927 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5928 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5929 } 5930 5931 5932 /* Now we loop on the constraints which need a change of basis */ 5933 /* 5934 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5935 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5936 5937 Basic blocks of change of basis matrix T computed by 5938 5939 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5940 5941 | 1 0 ... 0 s_1/S | 5942 | 0 1 ... 0 s_2/S | 5943 | ... | 5944 | 0 ... 1 s_{n-1}/S | 5945 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5946 5947 with S = \sum_{i=1}^n s_i^2 5948 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5949 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5950 5951 - QR decomposition of constraints otherwise 5952 */ 5953 if (qr_needed) { 5954 /* space to store Q */ 5955 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5956 /* array to store scaling factors for reflectors */ 5957 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5958 /* first we issue queries for optimal work */ 5959 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5960 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5961 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5962 lqr_work = -1; 5963 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5964 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5965 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5966 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5967 lgqr_work = -1; 5968 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5969 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5970 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5971 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5972 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5973 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5974 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5975 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5976 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5977 /* array to store rhs and solution of triangular solver */ 5978 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5979 /* allocating workspace for check */ 5980 if (pcbddc->dbg_flag) { 5981 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5982 } 5983 } 5984 /* array to store whether a node is primal or not */ 5985 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5986 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5987 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5988 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5989 for (i=0;i<total_primal_vertices;i++) { 5990 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5991 } 5992 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5993 5994 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5995 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5996 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5997 if (PetscBTLookup(change_basis,total_counts)) { 5998 /* get constraint info */ 5999 primal_dofs = constraints_n[total_counts]; 6000 dual_dofs = size_of_constraint-primal_dofs; 6001 6002 if (pcbddc->dbg_flag) { 6003 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6004 } 6005 6006 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6007 6008 /* copy quadrature constraints for change of basis check */ 6009 if (pcbddc->dbg_flag) { 6010 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6011 } 6012 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6013 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6014 6015 /* compute QR decomposition of constraints */ 6016 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6017 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6018 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6019 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6020 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6021 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6022 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6023 6024 /* explictly compute R^-T */ 6025 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6026 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6027 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6028 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6029 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6030 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6031 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6032 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6033 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6034 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6035 6036 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6037 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6038 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6039 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6040 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6041 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6042 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6043 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 6044 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6045 6046 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6047 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6048 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6049 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6050 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6051 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6052 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6053 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6054 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6055 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6056 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6057 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6058 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6059 6060 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6061 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6062 /* insert cols for primal dofs */ 6063 for (j=0;j<primal_dofs;j++) { 6064 start_vals = &qr_basis[j*size_of_constraint]; 6065 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6066 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6067 } 6068 /* insert cols for dual dofs */ 6069 for (j=0,k=0;j<dual_dofs;k++) { 6070 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6071 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6072 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6073 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6074 j++; 6075 } 6076 } 6077 6078 /* check change of basis */ 6079 if (pcbddc->dbg_flag) { 6080 PetscInt ii,jj; 6081 PetscBool valid_qr=PETSC_TRUE; 6082 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6083 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6084 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6085 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6087 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6088 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6089 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6090 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6091 for (jj=0;jj<size_of_constraint;jj++) { 6092 for (ii=0;ii<primal_dofs;ii++) { 6093 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6094 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 6095 } 6096 } 6097 if (!valid_qr) { 6098 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6099 for (jj=0;jj<size_of_constraint;jj++) { 6100 for (ii=0;ii<primal_dofs;ii++) { 6101 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6102 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6103 } 6104 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 6105 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6106 } 6107 } 6108 } 6109 } else { 6110 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6111 } 6112 } 6113 } else { /* simple transformation block */ 6114 PetscInt row,col; 6115 PetscScalar val,norm; 6116 6117 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6118 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6119 for (j=0;j<size_of_constraint;j++) { 6120 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6121 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6122 if (!PetscBTLookup(is_primal,row_B)) { 6123 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6124 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6125 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6126 } else { 6127 for (k=0;k<size_of_constraint;k++) { 6128 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6129 if (row != col) { 6130 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6131 } else { 6132 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6133 } 6134 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6135 } 6136 } 6137 } 6138 if (pcbddc->dbg_flag) { 6139 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6140 } 6141 } 6142 } else { 6143 if (pcbddc->dbg_flag) { 6144 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6145 } 6146 } 6147 } 6148 6149 /* free workspace */ 6150 if (qr_needed) { 6151 if (pcbddc->dbg_flag) { 6152 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6153 } 6154 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6155 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6156 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6157 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6158 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6159 } 6160 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6161 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6162 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6163 6164 /* assembling of global change of variable */ 6165 if (!pcbddc->fake_change) { 6166 Mat tmat; 6167 PetscInt bs; 6168 6169 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6170 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6171 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6172 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6173 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6174 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6175 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6176 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6177 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6178 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6179 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6180 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6181 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6182 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6183 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6184 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6185 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6186 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6187 6188 /* check */ 6189 if (pcbddc->dbg_flag) { 6190 PetscReal error; 6191 Vec x,x_change; 6192 6193 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6194 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6195 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6196 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6197 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6198 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6199 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6200 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6201 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6202 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6203 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6204 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6205 if (error > PETSC_SMALL) { 6206 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6207 } 6208 ierr = VecDestroy(&x);CHKERRQ(ierr); 6209 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6210 } 6211 /* adapt sub_schurs computed (if any) */ 6212 if (pcbddc->use_deluxe_scaling) { 6213 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6214 6215 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6216 if (sub_schurs && sub_schurs->S_Ej_all) { 6217 Mat S_new,tmat; 6218 IS is_all_N,is_V_Sall = NULL; 6219 6220 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6221 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6222 if (pcbddc->deluxe_zerorows) { 6223 ISLocalToGlobalMapping NtoSall; 6224 IS is_V; 6225 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6226 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6227 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6228 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6229 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6230 } 6231 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6232 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6233 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6234 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6235 if (pcbddc->deluxe_zerorows) { 6236 const PetscScalar *array; 6237 const PetscInt *idxs_V,*idxs_all; 6238 PetscInt i,n_V; 6239 6240 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6241 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6242 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6243 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6244 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6245 for (i=0;i<n_V;i++) { 6246 PetscScalar val; 6247 PetscInt idx; 6248 6249 idx = idxs_V[i]; 6250 val = array[idxs_all[idxs_V[i]]]; 6251 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6252 } 6253 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6254 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6255 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6256 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6257 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6258 } 6259 sub_schurs->S_Ej_all = S_new; 6260 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6261 if (sub_schurs->sum_S_Ej_all) { 6262 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6263 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6264 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6265 if (pcbddc->deluxe_zerorows) { 6266 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6267 } 6268 sub_schurs->sum_S_Ej_all = S_new; 6269 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6270 } 6271 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6272 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6273 } 6274 /* destroy any change of basis context in sub_schurs */ 6275 if (sub_schurs && sub_schurs->change) { 6276 PetscInt i; 6277 6278 for (i=0;i<sub_schurs->n_subs;i++) { 6279 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6280 } 6281 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6282 } 6283 } 6284 if (pcbddc->switch_static) { /* need to save the local change */ 6285 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6286 } else { 6287 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6288 } 6289 /* determine if any process has changed the pressures locally */ 6290 pcbddc->change_interior = pcbddc->benign_have_null; 6291 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6292 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6293 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6294 pcbddc->use_qr_single = qr_needed; 6295 } 6296 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6297 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6298 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6299 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6300 } else { 6301 Mat benign_global = NULL; 6302 if (pcbddc->benign_have_null) { 6303 Mat tmat; 6304 6305 pcbddc->change_interior = PETSC_TRUE; 6306 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6307 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6308 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6309 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6310 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6311 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6312 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6313 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6314 if (pcbddc->benign_change) { 6315 Mat M; 6316 6317 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6318 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6319 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6320 ierr = MatDestroy(&M);CHKERRQ(ierr); 6321 } else { 6322 Mat eye; 6323 PetscScalar *array; 6324 6325 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6326 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6327 for (i=0;i<pcis->n;i++) { 6328 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6329 } 6330 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6331 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6332 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6333 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6334 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6335 } 6336 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6337 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6338 } 6339 if (pcbddc->user_ChangeOfBasisMatrix) { 6340 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6341 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6342 } else if (pcbddc->benign_have_null) { 6343 pcbddc->ChangeOfBasisMatrix = benign_global; 6344 } 6345 } 6346 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6347 IS is_global; 6348 const PetscInt *gidxs; 6349 6350 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6351 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6352 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6353 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6354 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6355 } 6356 } 6357 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6358 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6359 } 6360 6361 if (!pcbddc->fake_change) { 6362 /* add pressure dofs to set of primal nodes for numbering purposes */ 6363 for (i=0;i<pcbddc->benign_n;i++) { 6364 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6365 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6366 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6367 pcbddc->local_primal_size_cc++; 6368 pcbddc->local_primal_size++; 6369 } 6370 6371 /* check if a new primal space has been introduced (also take into account benign trick) */ 6372 pcbddc->new_primal_space_local = PETSC_TRUE; 6373 if (olocal_primal_size == pcbddc->local_primal_size) { 6374 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6375 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6376 if (!pcbddc->new_primal_space_local) { 6377 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6378 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6379 } 6380 } 6381 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6382 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6383 } 6384 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6385 6386 /* flush dbg viewer */ 6387 if (pcbddc->dbg_flag) { 6388 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6389 } 6390 6391 /* free workspace */ 6392 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6393 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6394 if (!pcbddc->adaptive_selection) { 6395 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6396 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6397 } else { 6398 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6399 pcbddc->adaptive_constraints_idxs_ptr, 6400 pcbddc->adaptive_constraints_data_ptr, 6401 pcbddc->adaptive_constraints_idxs, 6402 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6403 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6404 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6405 } 6406 PetscFunctionReturn(0); 6407 } 6408 6409 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6410 { 6411 ISLocalToGlobalMapping map; 6412 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6413 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6414 PetscInt i,N; 6415 PetscBool rcsr = PETSC_FALSE; 6416 PetscErrorCode ierr; 6417 6418 PetscFunctionBegin; 6419 if (pcbddc->recompute_topography) { 6420 pcbddc->graphanalyzed = PETSC_FALSE; 6421 /* Reset previously computed graph */ 6422 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6423 /* Init local Graph struct */ 6424 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6425 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6426 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6427 6428 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6429 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6430 } 6431 /* Check validity of the csr graph passed in by the user */ 6432 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6433 6434 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6435 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6436 PetscInt *xadj,*adjncy; 6437 PetscInt nvtxs; 6438 PetscBool flg_row=PETSC_FALSE; 6439 6440 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6441 if (flg_row) { 6442 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6443 pcbddc->computed_rowadj = PETSC_TRUE; 6444 } 6445 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6446 rcsr = PETSC_TRUE; 6447 } 6448 if (pcbddc->dbg_flag) { 6449 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6450 } 6451 6452 /* Setup of Graph */ 6453 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6454 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6455 6456 /* attach info on disconnected subdomains if present */ 6457 if (pcbddc->n_local_subs) { 6458 PetscInt *local_subs; 6459 6460 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6461 for (i=0;i<pcbddc->n_local_subs;i++) { 6462 const PetscInt *idxs; 6463 PetscInt nl,j; 6464 6465 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6466 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6467 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6468 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6469 } 6470 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6471 pcbddc->mat_graph->local_subs = local_subs; 6472 } 6473 } 6474 6475 if (!pcbddc->graphanalyzed) { 6476 /* Graph's connected components analysis */ 6477 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6478 pcbddc->graphanalyzed = PETSC_TRUE; 6479 } 6480 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6481 PetscFunctionReturn(0); 6482 } 6483 6484 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6485 { 6486 PetscInt i,j; 6487 PetscScalar *alphas; 6488 PetscErrorCode ierr; 6489 6490 PetscFunctionBegin; 6491 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6492 for (i=0;i<n;i++) { 6493 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6494 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6495 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6496 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6497 } 6498 ierr = PetscFree(alphas);CHKERRQ(ierr); 6499 PetscFunctionReturn(0); 6500 } 6501 6502 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6503 { 6504 Mat A; 6505 PetscInt n_neighs,*neighs,*n_shared,**shared; 6506 PetscMPIInt size,rank,color; 6507 PetscInt *xadj,*adjncy; 6508 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6509 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6510 PetscInt void_procs,*procs_candidates = NULL; 6511 PetscInt xadj_count,*count; 6512 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6513 PetscSubcomm psubcomm; 6514 MPI_Comm subcomm; 6515 PetscErrorCode ierr; 6516 6517 PetscFunctionBegin; 6518 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6519 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6520 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6521 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6522 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6523 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6524 6525 if (have_void) *have_void = PETSC_FALSE; 6526 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6527 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6528 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6529 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6530 im_active = !!n; 6531 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6532 void_procs = size - active_procs; 6533 /* get ranks of of non-active processes in mat communicator */ 6534 if (void_procs) { 6535 PetscInt ncand; 6536 6537 if (have_void) *have_void = PETSC_TRUE; 6538 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6539 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6540 for (i=0,ncand=0;i<size;i++) { 6541 if (!procs_candidates[i]) { 6542 procs_candidates[ncand++] = i; 6543 } 6544 } 6545 /* force n_subdomains to be not greater that the number of non-active processes */ 6546 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6547 } 6548 6549 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6550 number of subdomains requested 1 -> send to master or first candidate in voids */ 6551 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6552 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6553 PetscInt issize,isidx,dest; 6554 if (*n_subdomains == 1) dest = 0; 6555 else dest = rank; 6556 if (im_active) { 6557 issize = 1; 6558 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6559 isidx = procs_candidates[dest]; 6560 } else { 6561 isidx = dest; 6562 } 6563 } else { 6564 issize = 0; 6565 isidx = -1; 6566 } 6567 if (*n_subdomains != 1) *n_subdomains = active_procs; 6568 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6569 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6570 PetscFunctionReturn(0); 6571 } 6572 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6573 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6574 threshold = PetscMax(threshold,2); 6575 6576 /* Get info on mapping */ 6577 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6578 6579 /* build local CSR graph of subdomains' connectivity */ 6580 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6581 xadj[0] = 0; 6582 xadj[1] = PetscMax(n_neighs-1,0); 6583 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6584 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6585 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6586 for (i=1;i<n_neighs;i++) 6587 for (j=0;j<n_shared[i];j++) 6588 count[shared[i][j]] += 1; 6589 6590 xadj_count = 0; 6591 for (i=1;i<n_neighs;i++) { 6592 for (j=0;j<n_shared[i];j++) { 6593 if (count[shared[i][j]] < threshold) { 6594 adjncy[xadj_count] = neighs[i]; 6595 adjncy_wgt[xadj_count] = n_shared[i]; 6596 xadj_count++; 6597 break; 6598 } 6599 } 6600 } 6601 xadj[1] = xadj_count; 6602 ierr = PetscFree(count);CHKERRQ(ierr); 6603 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6604 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6605 6606 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6607 6608 /* Restrict work on active processes only */ 6609 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6610 if (void_procs) { 6611 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6612 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6613 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6614 subcomm = PetscSubcommChild(psubcomm); 6615 } else { 6616 psubcomm = NULL; 6617 subcomm = PetscObjectComm((PetscObject)mat); 6618 } 6619 6620 v_wgt = NULL; 6621 if (!color) { 6622 ierr = PetscFree(xadj);CHKERRQ(ierr); 6623 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6624 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6625 } else { 6626 Mat subdomain_adj; 6627 IS new_ranks,new_ranks_contig; 6628 MatPartitioning partitioner; 6629 PetscInt rstart=0,rend=0; 6630 PetscInt *is_indices,*oldranks; 6631 PetscMPIInt size; 6632 PetscBool aggregate; 6633 6634 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6635 if (void_procs) { 6636 PetscInt prank = rank; 6637 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6638 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6639 for (i=0;i<xadj[1];i++) { 6640 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6641 } 6642 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6643 } else { 6644 oldranks = NULL; 6645 } 6646 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6647 if (aggregate) { /* TODO: all this part could be made more efficient */ 6648 PetscInt lrows,row,ncols,*cols; 6649 PetscMPIInt nrank; 6650 PetscScalar *vals; 6651 6652 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6653 lrows = 0; 6654 if (nrank<redprocs) { 6655 lrows = size/redprocs; 6656 if (nrank<size%redprocs) lrows++; 6657 } 6658 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6659 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6660 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6661 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6662 row = nrank; 6663 ncols = xadj[1]-xadj[0]; 6664 cols = adjncy; 6665 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6666 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6667 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6668 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6669 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6670 ierr = PetscFree(xadj);CHKERRQ(ierr); 6671 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6672 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6673 ierr = PetscFree(vals);CHKERRQ(ierr); 6674 if (use_vwgt) { 6675 Vec v; 6676 const PetscScalar *array; 6677 PetscInt nl; 6678 6679 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6680 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6681 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6682 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6683 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6684 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6685 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6686 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6687 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6688 ierr = VecDestroy(&v);CHKERRQ(ierr); 6689 } 6690 } else { 6691 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6692 if (use_vwgt) { 6693 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6694 v_wgt[0] = n; 6695 } 6696 } 6697 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6698 6699 /* Partition */ 6700 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6701 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6702 if (v_wgt) { 6703 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6704 } 6705 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6706 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6707 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6708 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6709 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6710 6711 /* renumber new_ranks to avoid "holes" in new set of processors */ 6712 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6713 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6714 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6715 if (!aggregate) { 6716 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6717 #if defined(PETSC_USE_DEBUG) 6718 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6719 #endif 6720 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6721 } else if (oldranks) { 6722 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6723 } else { 6724 ranks_send_to_idx[0] = is_indices[0]; 6725 } 6726 } else { 6727 PetscInt idxs[1]; 6728 PetscMPIInt tag; 6729 MPI_Request *reqs; 6730 6731 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6732 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6733 for (i=rstart;i<rend;i++) { 6734 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6735 } 6736 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6737 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6738 ierr = PetscFree(reqs);CHKERRQ(ierr); 6739 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6740 #if defined(PETSC_USE_DEBUG) 6741 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6742 #endif 6743 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6744 } else if (oldranks) { 6745 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6746 } else { 6747 ranks_send_to_idx[0] = idxs[0]; 6748 } 6749 } 6750 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6751 /* clean up */ 6752 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6753 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6754 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6755 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6756 } 6757 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6758 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6759 6760 /* assemble parallel IS for sends */ 6761 i = 1; 6762 if (!color) i=0; 6763 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6764 PetscFunctionReturn(0); 6765 } 6766 6767 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6768 6769 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6770 { 6771 Mat local_mat; 6772 IS is_sends_internal; 6773 PetscInt rows,cols,new_local_rows; 6774 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6775 PetscBool ismatis,isdense,newisdense,destroy_mat; 6776 ISLocalToGlobalMapping l2gmap; 6777 PetscInt* l2gmap_indices; 6778 const PetscInt* is_indices; 6779 MatType new_local_type; 6780 /* buffers */ 6781 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6782 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6783 PetscInt *recv_buffer_idxs_local; 6784 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6785 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6786 /* MPI */ 6787 MPI_Comm comm,comm_n; 6788 PetscSubcomm subcomm; 6789 PetscMPIInt n_sends,n_recvs,commsize; 6790 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6791 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6792 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6793 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6794 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6795 PetscErrorCode ierr; 6796 6797 PetscFunctionBegin; 6798 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6799 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6800 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6801 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6802 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6803 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6804 PetscValidLogicalCollectiveBool(mat,reuse,6); 6805 PetscValidLogicalCollectiveInt(mat,nis,8); 6806 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6807 if (nvecs) { 6808 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6809 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6810 } 6811 /* further checks */ 6812 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6813 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6814 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6815 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6816 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6817 if (reuse && *mat_n) { 6818 PetscInt mrows,mcols,mnrows,mncols; 6819 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6820 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6821 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6822 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6823 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6824 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6825 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6826 } 6827 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6828 PetscValidLogicalCollectiveInt(mat,bs,0); 6829 6830 /* prepare IS for sending if not provided */ 6831 if (!is_sends) { 6832 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6833 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6834 } else { 6835 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6836 is_sends_internal = is_sends; 6837 } 6838 6839 /* get comm */ 6840 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6841 6842 /* compute number of sends */ 6843 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6844 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6845 6846 /* compute number of receives */ 6847 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6848 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6849 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6850 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6851 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6852 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6853 ierr = PetscFree(iflags);CHKERRQ(ierr); 6854 6855 /* restrict comm if requested */ 6856 subcomm = 0; 6857 destroy_mat = PETSC_FALSE; 6858 if (restrict_comm) { 6859 PetscMPIInt color,subcommsize; 6860 6861 color = 0; 6862 if (restrict_full) { 6863 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6864 } else { 6865 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6866 } 6867 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6868 subcommsize = commsize - subcommsize; 6869 /* check if reuse has been requested */ 6870 if (reuse) { 6871 if (*mat_n) { 6872 PetscMPIInt subcommsize2; 6873 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6874 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6875 comm_n = PetscObjectComm((PetscObject)*mat_n); 6876 } else { 6877 comm_n = PETSC_COMM_SELF; 6878 } 6879 } else { /* MAT_INITIAL_MATRIX */ 6880 PetscMPIInt rank; 6881 6882 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6883 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6884 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6885 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6886 comm_n = PetscSubcommChild(subcomm); 6887 } 6888 /* flag to destroy *mat_n if not significative */ 6889 if (color) destroy_mat = PETSC_TRUE; 6890 } else { 6891 comm_n = comm; 6892 } 6893 6894 /* prepare send/receive buffers */ 6895 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6896 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6897 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6898 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6899 if (nis) { 6900 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6901 } 6902 6903 /* Get data from local matrices */ 6904 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6905 /* TODO: See below some guidelines on how to prepare the local buffers */ 6906 /* 6907 send_buffer_vals should contain the raw values of the local matrix 6908 send_buffer_idxs should contain: 6909 - MatType_PRIVATE type 6910 - PetscInt size_of_l2gmap 6911 - PetscInt global_row_indices[size_of_l2gmap] 6912 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6913 */ 6914 else { 6915 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6916 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6917 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6918 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6919 send_buffer_idxs[1] = i; 6920 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6921 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6922 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6923 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6924 for (i=0;i<n_sends;i++) { 6925 ilengths_vals[is_indices[i]] = len*len; 6926 ilengths_idxs[is_indices[i]] = len+2; 6927 } 6928 } 6929 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6930 /* additional is (if any) */ 6931 if (nis) { 6932 PetscMPIInt psum; 6933 PetscInt j; 6934 for (j=0,psum=0;j<nis;j++) { 6935 PetscInt plen; 6936 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6937 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6938 psum += len+1; /* indices + lenght */ 6939 } 6940 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6941 for (j=0,psum=0;j<nis;j++) { 6942 PetscInt plen; 6943 const PetscInt *is_array_idxs; 6944 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6945 send_buffer_idxs_is[psum] = plen; 6946 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6947 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6948 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6949 psum += plen+1; /* indices + lenght */ 6950 } 6951 for (i=0;i<n_sends;i++) { 6952 ilengths_idxs_is[is_indices[i]] = psum; 6953 } 6954 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6955 } 6956 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 6957 6958 buf_size_idxs = 0; 6959 buf_size_vals = 0; 6960 buf_size_idxs_is = 0; 6961 buf_size_vecs = 0; 6962 for (i=0;i<n_recvs;i++) { 6963 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6964 buf_size_vals += (PetscInt)olengths_vals[i]; 6965 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6966 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6967 } 6968 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6969 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6970 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6971 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6972 6973 /* get new tags for clean communications */ 6974 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6975 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6976 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6977 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6978 6979 /* allocate for requests */ 6980 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6981 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6982 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6983 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6984 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6985 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6986 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6987 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6988 6989 /* communications */ 6990 ptr_idxs = recv_buffer_idxs; 6991 ptr_vals = recv_buffer_vals; 6992 ptr_idxs_is = recv_buffer_idxs_is; 6993 ptr_vecs = recv_buffer_vecs; 6994 for (i=0;i<n_recvs;i++) { 6995 source_dest = onodes[i]; 6996 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6997 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6998 ptr_idxs += olengths_idxs[i]; 6999 ptr_vals += olengths_vals[i]; 7000 if (nis) { 7001 source_dest = onodes_is[i]; 7002 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7003 ptr_idxs_is += olengths_idxs_is[i]; 7004 } 7005 if (nvecs) { 7006 source_dest = onodes[i]; 7007 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7008 ptr_vecs += olengths_idxs[i]-2; 7009 } 7010 } 7011 for (i=0;i<n_sends;i++) { 7012 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7013 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7014 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7015 if (nis) { 7016 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7017 } 7018 if (nvecs) { 7019 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7020 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7021 } 7022 } 7023 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7024 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7025 7026 /* assemble new l2g map */ 7027 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7028 ptr_idxs = recv_buffer_idxs; 7029 new_local_rows = 0; 7030 for (i=0;i<n_recvs;i++) { 7031 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7032 ptr_idxs += olengths_idxs[i]; 7033 } 7034 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7035 ptr_idxs = recv_buffer_idxs; 7036 new_local_rows = 0; 7037 for (i=0;i<n_recvs;i++) { 7038 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7039 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7040 ptr_idxs += olengths_idxs[i]; 7041 } 7042 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7043 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7044 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7045 7046 /* infer new local matrix type from received local matrices type */ 7047 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7048 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7049 if (n_recvs) { 7050 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7051 ptr_idxs = recv_buffer_idxs; 7052 for (i=0;i<n_recvs;i++) { 7053 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7054 new_local_type_private = MATAIJ_PRIVATE; 7055 break; 7056 } 7057 ptr_idxs += olengths_idxs[i]; 7058 } 7059 switch (new_local_type_private) { 7060 case MATDENSE_PRIVATE: 7061 new_local_type = MATSEQAIJ; 7062 bs = 1; 7063 break; 7064 case MATAIJ_PRIVATE: 7065 new_local_type = MATSEQAIJ; 7066 bs = 1; 7067 break; 7068 case MATBAIJ_PRIVATE: 7069 new_local_type = MATSEQBAIJ; 7070 break; 7071 case MATSBAIJ_PRIVATE: 7072 new_local_type = MATSEQSBAIJ; 7073 break; 7074 default: 7075 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7076 break; 7077 } 7078 } else { /* by default, new_local_type is seqaij */ 7079 new_local_type = MATSEQAIJ; 7080 bs = 1; 7081 } 7082 7083 /* create MATIS object if needed */ 7084 if (!reuse) { 7085 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7086 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7087 } else { 7088 /* it also destroys the local matrices */ 7089 if (*mat_n) { 7090 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7091 } else { /* this is a fake object */ 7092 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7093 } 7094 } 7095 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7096 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7097 7098 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7099 7100 /* Global to local map of received indices */ 7101 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7102 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7103 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7104 7105 /* restore attributes -> type of incoming data and its size */ 7106 buf_size_idxs = 0; 7107 for (i=0;i<n_recvs;i++) { 7108 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7109 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7110 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7111 } 7112 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7113 7114 /* set preallocation */ 7115 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7116 if (!newisdense) { 7117 PetscInt *new_local_nnz=0; 7118 7119 ptr_idxs = recv_buffer_idxs_local; 7120 if (n_recvs) { 7121 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7122 } 7123 for (i=0;i<n_recvs;i++) { 7124 PetscInt j; 7125 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7126 for (j=0;j<*(ptr_idxs+1);j++) { 7127 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7128 } 7129 } else { 7130 /* TODO */ 7131 } 7132 ptr_idxs += olengths_idxs[i]; 7133 } 7134 if (new_local_nnz) { 7135 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7136 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7137 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7138 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7139 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7140 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7141 } else { 7142 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7143 } 7144 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7145 } else { 7146 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7147 } 7148 7149 /* set values */ 7150 ptr_vals = recv_buffer_vals; 7151 ptr_idxs = recv_buffer_idxs_local; 7152 for (i=0;i<n_recvs;i++) { 7153 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7154 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7155 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7156 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7157 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7158 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7159 } else { 7160 /* TODO */ 7161 } 7162 ptr_idxs += olengths_idxs[i]; 7163 ptr_vals += olengths_vals[i]; 7164 } 7165 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7166 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7167 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7168 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7169 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7170 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7171 7172 #if 0 7173 if (!restrict_comm) { /* check */ 7174 Vec lvec,rvec; 7175 PetscReal infty_error; 7176 7177 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7178 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7179 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7180 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7181 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7182 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7183 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7184 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7185 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7186 } 7187 #endif 7188 7189 /* assemble new additional is (if any) */ 7190 if (nis) { 7191 PetscInt **temp_idxs,*count_is,j,psum; 7192 7193 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7194 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7195 ptr_idxs = recv_buffer_idxs_is; 7196 psum = 0; 7197 for (i=0;i<n_recvs;i++) { 7198 for (j=0;j<nis;j++) { 7199 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7200 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7201 psum += plen; 7202 ptr_idxs += plen+1; /* shift pointer to received data */ 7203 } 7204 } 7205 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7206 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7207 for (i=1;i<nis;i++) { 7208 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7209 } 7210 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7211 ptr_idxs = recv_buffer_idxs_is; 7212 for (i=0;i<n_recvs;i++) { 7213 for (j=0;j<nis;j++) { 7214 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7215 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7216 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7217 ptr_idxs += plen+1; /* shift pointer to received data */ 7218 } 7219 } 7220 for (i=0;i<nis;i++) { 7221 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7222 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7223 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7224 } 7225 ierr = PetscFree(count_is);CHKERRQ(ierr); 7226 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7227 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7228 } 7229 /* free workspace */ 7230 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7231 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7232 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7233 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7234 if (isdense) { 7235 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7236 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7237 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7238 } else { 7239 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7240 } 7241 if (nis) { 7242 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7243 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7244 } 7245 7246 if (nvecs) { 7247 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7248 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7249 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7250 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7251 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7252 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7253 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7254 /* set values */ 7255 ptr_vals = recv_buffer_vecs; 7256 ptr_idxs = recv_buffer_idxs_local; 7257 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7258 for (i=0;i<n_recvs;i++) { 7259 PetscInt j; 7260 for (j=0;j<*(ptr_idxs+1);j++) { 7261 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7262 } 7263 ptr_idxs += olengths_idxs[i]; 7264 ptr_vals += olengths_idxs[i]-2; 7265 } 7266 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7267 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7268 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7269 } 7270 7271 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7272 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7273 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7274 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7275 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7276 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7277 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7278 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7279 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7280 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7281 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7282 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7283 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7284 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7285 ierr = PetscFree(onodes);CHKERRQ(ierr); 7286 if (nis) { 7287 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7288 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7289 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7290 } 7291 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7292 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7293 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7294 for (i=0;i<nis;i++) { 7295 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7296 } 7297 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7298 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7299 } 7300 *mat_n = NULL; 7301 } 7302 PetscFunctionReturn(0); 7303 } 7304 7305 /* temporary hack into ksp private data structure */ 7306 #include <petsc/private/kspimpl.h> 7307 7308 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7309 { 7310 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7311 PC_IS *pcis = (PC_IS*)pc->data; 7312 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7313 Mat coarsedivudotp = NULL; 7314 Mat coarseG,t_coarse_mat_is; 7315 MatNullSpace CoarseNullSpace = NULL; 7316 ISLocalToGlobalMapping coarse_islg; 7317 IS coarse_is,*isarray; 7318 PetscInt i,im_active=-1,active_procs=-1; 7319 PetscInt nis,nisdofs,nisneu,nisvert; 7320 PC pc_temp; 7321 PCType coarse_pc_type; 7322 KSPType coarse_ksp_type; 7323 PetscBool multilevel_requested,multilevel_allowed; 7324 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7325 PetscInt ncoarse,nedcfield; 7326 PetscBool compute_vecs = PETSC_FALSE; 7327 PetscScalar *array; 7328 MatReuse coarse_mat_reuse; 7329 PetscBool restr, full_restr, have_void; 7330 PetscMPIInt commsize; 7331 PetscErrorCode ierr; 7332 7333 PetscFunctionBegin; 7334 /* Assign global numbering to coarse dofs */ 7335 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7336 PetscInt ocoarse_size; 7337 compute_vecs = PETSC_TRUE; 7338 7339 pcbddc->new_primal_space = PETSC_TRUE; 7340 ocoarse_size = pcbddc->coarse_size; 7341 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7342 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7343 /* see if we can avoid some work */ 7344 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7345 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7346 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7347 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7348 coarse_reuse = PETSC_FALSE; 7349 } else { /* we can safely reuse already computed coarse matrix */ 7350 coarse_reuse = PETSC_TRUE; 7351 } 7352 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7353 coarse_reuse = PETSC_FALSE; 7354 } 7355 /* reset any subassembling information */ 7356 if (!coarse_reuse || pcbddc->recompute_topography) { 7357 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7358 } 7359 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7360 coarse_reuse = PETSC_TRUE; 7361 } 7362 /* assemble coarse matrix */ 7363 if (coarse_reuse && pcbddc->coarse_ksp) { 7364 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7365 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7366 coarse_mat_reuse = MAT_REUSE_MATRIX; 7367 } else { 7368 coarse_mat = NULL; 7369 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7370 } 7371 7372 /* creates temporary l2gmap and IS for coarse indexes */ 7373 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7374 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7375 7376 /* creates temporary MATIS object for coarse matrix */ 7377 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7378 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7379 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7380 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7381 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7382 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7383 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7384 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7385 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7386 7387 /* count "active" (i.e. with positive local size) and "void" processes */ 7388 im_active = !!(pcis->n); 7389 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7390 7391 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7392 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7393 /* full_restr : just use the receivers from the subassembling pattern */ 7394 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7395 coarse_mat_is = NULL; 7396 multilevel_allowed = PETSC_FALSE; 7397 multilevel_requested = PETSC_FALSE; 7398 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7399 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7400 if (multilevel_requested) { 7401 ncoarse = active_procs/pcbddc->coarsening_ratio; 7402 restr = PETSC_FALSE; 7403 full_restr = PETSC_FALSE; 7404 } else { 7405 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7406 restr = PETSC_TRUE; 7407 full_restr = PETSC_TRUE; 7408 } 7409 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7410 ncoarse = PetscMax(1,ncoarse); 7411 if (!pcbddc->coarse_subassembling) { 7412 if (pcbddc->coarsening_ratio > 1) { 7413 if (multilevel_requested) { 7414 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7415 } else { 7416 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7417 } 7418 } else { 7419 PetscMPIInt rank; 7420 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7421 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7422 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7423 } 7424 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7425 PetscInt psum; 7426 if (pcbddc->coarse_ksp) psum = 1; 7427 else psum = 0; 7428 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7429 if (ncoarse < commsize) have_void = PETSC_TRUE; 7430 } 7431 /* determine if we can go multilevel */ 7432 if (multilevel_requested) { 7433 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7434 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7435 } 7436 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7437 7438 /* dump subassembling pattern */ 7439 if (pcbddc->dbg_flag && multilevel_allowed) { 7440 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7441 } 7442 7443 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7444 nedcfield = -1; 7445 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7446 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7447 const PetscInt *idxs; 7448 ISLocalToGlobalMapping tmap; 7449 7450 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7451 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7452 /* allocate space for temporary storage */ 7453 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7454 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7455 /* allocate for IS array */ 7456 nisdofs = pcbddc->n_ISForDofsLocal; 7457 if (pcbddc->nedclocal) { 7458 if (pcbddc->nedfield > -1) { 7459 nedcfield = pcbddc->nedfield; 7460 } else { 7461 nedcfield = 0; 7462 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7463 nisdofs = 1; 7464 } 7465 } 7466 nisneu = !!pcbddc->NeumannBoundariesLocal; 7467 nisvert = 0; /* nisvert is not used */ 7468 nis = nisdofs + nisneu + nisvert; 7469 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7470 /* dofs splitting */ 7471 for (i=0;i<nisdofs;i++) { 7472 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7473 if (nedcfield != i) { 7474 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7475 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7476 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7477 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7478 } else { 7479 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7480 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7481 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7482 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7483 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7484 } 7485 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7486 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7487 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7488 } 7489 /* neumann boundaries */ 7490 if (pcbddc->NeumannBoundariesLocal) { 7491 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7492 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7493 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7494 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7495 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7496 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7497 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7498 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7499 } 7500 /* free memory */ 7501 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7502 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7503 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7504 } else { 7505 nis = 0; 7506 nisdofs = 0; 7507 nisneu = 0; 7508 nisvert = 0; 7509 isarray = NULL; 7510 } 7511 /* destroy no longer needed map */ 7512 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7513 7514 /* subassemble */ 7515 if (multilevel_allowed) { 7516 Vec vp[1]; 7517 PetscInt nvecs = 0; 7518 PetscBool reuse,reuser; 7519 7520 if (coarse_mat) reuse = PETSC_TRUE; 7521 else reuse = PETSC_FALSE; 7522 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7523 vp[0] = NULL; 7524 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7525 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7526 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7527 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7528 nvecs = 1; 7529 7530 if (pcbddc->divudotp) { 7531 Mat B,loc_divudotp; 7532 Vec v,p; 7533 IS dummy; 7534 PetscInt np; 7535 7536 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7537 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7538 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7539 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7540 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7541 ierr = VecSet(p,1.);CHKERRQ(ierr); 7542 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7543 ierr = VecDestroy(&p);CHKERRQ(ierr); 7544 ierr = MatDestroy(&B);CHKERRQ(ierr); 7545 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7546 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7547 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7548 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7549 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7550 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7551 ierr = VecDestroy(&v);CHKERRQ(ierr); 7552 } 7553 } 7554 if (reuser) { 7555 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7556 } else { 7557 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7558 } 7559 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7560 PetscScalar *arraym,*arrayv; 7561 PetscInt nl; 7562 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7564 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7565 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7566 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7567 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7568 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7569 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7570 } else { 7571 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7572 } 7573 } else { 7574 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7575 } 7576 if (coarse_mat_is || coarse_mat) { 7577 PetscMPIInt size; 7578 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7579 if (!multilevel_allowed) { 7580 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7581 } else { 7582 Mat A; 7583 7584 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7585 if (coarse_mat_is) { 7586 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7587 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7588 coarse_mat = coarse_mat_is; 7589 } 7590 /* be sure we don't have MatSeqDENSE as local mat */ 7591 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7592 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7593 } 7594 } 7595 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7596 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7597 7598 /* create local to global scatters for coarse problem */ 7599 if (compute_vecs) { 7600 PetscInt lrows; 7601 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7602 if (coarse_mat) { 7603 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7604 } else { 7605 lrows = 0; 7606 } 7607 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7608 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7609 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7610 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7611 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7612 } 7613 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7614 7615 /* set defaults for coarse KSP and PC */ 7616 if (multilevel_allowed) { 7617 coarse_ksp_type = KSPRICHARDSON; 7618 coarse_pc_type = PCBDDC; 7619 } else { 7620 coarse_ksp_type = KSPPREONLY; 7621 coarse_pc_type = PCREDUNDANT; 7622 } 7623 7624 /* print some info if requested */ 7625 if (pcbddc->dbg_flag) { 7626 if (!multilevel_allowed) { 7627 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7628 if (multilevel_requested) { 7629 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7630 } else if (pcbddc->max_levels) { 7631 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7632 } 7633 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7634 } 7635 } 7636 7637 /* communicate coarse discrete gradient */ 7638 coarseG = NULL; 7639 if (pcbddc->nedcG && multilevel_allowed) { 7640 MPI_Comm ccomm; 7641 if (coarse_mat) { 7642 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7643 } else { 7644 ccomm = MPI_COMM_NULL; 7645 } 7646 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7647 } 7648 7649 /* create the coarse KSP object only once with defaults */ 7650 if (coarse_mat) { 7651 PetscViewer dbg_viewer = NULL; 7652 if (pcbddc->dbg_flag) { 7653 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7654 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7655 } 7656 if (!pcbddc->coarse_ksp) { 7657 char prefix[256],str_level[16]; 7658 size_t len; 7659 7660 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7661 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7662 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7663 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7664 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7665 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7666 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7667 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7668 /* TODO is this logic correct? should check for coarse_mat type */ 7669 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7670 /* prefix */ 7671 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7672 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7673 if (!pcbddc->current_level) { 7674 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7675 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7676 } else { 7677 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7678 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7679 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7680 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7681 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7682 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7683 } 7684 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7685 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7686 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7687 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7688 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7689 /* allow user customization */ 7690 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7691 } 7692 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7693 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7694 if (nisdofs) { 7695 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7696 for (i=0;i<nisdofs;i++) { 7697 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7698 } 7699 } 7700 if (nisneu) { 7701 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7702 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7703 } 7704 if (nisvert) { 7705 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7706 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7707 } 7708 if (coarseG) { 7709 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7710 } 7711 7712 /* get some info after set from options */ 7713 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7714 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7715 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7716 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7717 if (isbddc && !multilevel_allowed) { 7718 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7719 isbddc = PETSC_FALSE; 7720 } 7721 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7722 if (multilevel_requested && !isbddc && !isnn) { 7723 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7724 isbddc = PETSC_TRUE; 7725 isnn = PETSC_FALSE; 7726 } 7727 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7728 if (isredundant) { 7729 KSP inner_ksp; 7730 PC inner_pc; 7731 7732 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7733 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7734 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7735 } 7736 7737 /* parameters which miss an API */ 7738 if (isbddc) { 7739 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7740 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7741 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7742 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7743 if (pcbddc_coarse->benign_saddle_point) { 7744 Mat coarsedivudotp_is; 7745 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7746 IS row,col; 7747 const PetscInt *gidxs; 7748 PetscInt n,st,M,N; 7749 7750 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7751 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7752 st = st-n; 7753 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7754 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7755 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7756 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7757 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7758 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7759 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7760 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7761 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7762 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7763 ierr = ISDestroy(&row);CHKERRQ(ierr); 7764 ierr = ISDestroy(&col);CHKERRQ(ierr); 7765 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7766 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7767 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7768 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7769 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7770 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7771 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7772 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7773 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7774 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7775 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7776 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7777 } 7778 } 7779 7780 /* propagate symmetry info of coarse matrix */ 7781 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7782 if (pc->pmat->symmetric_set) { 7783 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7784 } 7785 if (pc->pmat->hermitian_set) { 7786 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7787 } 7788 if (pc->pmat->spd_set) { 7789 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7790 } 7791 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7792 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7793 } 7794 /* set operators */ 7795 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7796 if (pcbddc->dbg_flag) { 7797 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7798 } 7799 } 7800 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7801 ierr = PetscFree(isarray);CHKERRQ(ierr); 7802 #if 0 7803 { 7804 PetscViewer viewer; 7805 char filename[256]; 7806 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7807 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7808 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7809 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7810 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7811 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7812 } 7813 #endif 7814 7815 if (pcbddc->coarse_ksp) { 7816 Vec crhs,csol; 7817 7818 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7819 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7820 if (!csol) { 7821 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7822 } 7823 if (!crhs) { 7824 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7825 } 7826 } 7827 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7828 7829 /* compute null space for coarse solver if the benign trick has been requested */ 7830 if (pcbddc->benign_null) { 7831 7832 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7833 for (i=0;i<pcbddc->benign_n;i++) { 7834 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7835 } 7836 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7837 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7838 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7839 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7840 if (coarse_mat) { 7841 Vec nullv; 7842 PetscScalar *array,*array2; 7843 PetscInt nl; 7844 7845 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7846 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7847 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7848 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7849 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7850 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7851 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7852 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7853 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7854 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7855 } 7856 } 7857 7858 if (pcbddc->coarse_ksp) { 7859 PetscBool ispreonly; 7860 7861 if (CoarseNullSpace) { 7862 PetscBool isnull; 7863 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7864 if (isnull) { 7865 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7866 } 7867 /* TODO: add local nullspaces (if any) */ 7868 } 7869 /* setup coarse ksp */ 7870 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7871 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7872 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7873 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7874 KSP check_ksp; 7875 KSPType check_ksp_type; 7876 PC check_pc; 7877 Vec check_vec,coarse_vec; 7878 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7879 PetscInt its; 7880 PetscBool compute_eigs; 7881 PetscReal *eigs_r,*eigs_c; 7882 PetscInt neigs; 7883 const char *prefix; 7884 7885 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7886 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7887 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7888 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7889 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7890 /* prevent from setup unneeded object */ 7891 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7892 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7893 if (ispreonly) { 7894 check_ksp_type = KSPPREONLY; 7895 compute_eigs = PETSC_FALSE; 7896 } else { 7897 check_ksp_type = KSPGMRES; 7898 compute_eigs = PETSC_TRUE; 7899 } 7900 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7901 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7902 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7903 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7904 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7905 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7906 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7907 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7908 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7909 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7910 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7911 /* create random vec */ 7912 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7913 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7914 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7915 /* solve coarse problem */ 7916 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7917 /* set eigenvalue estimation if preonly has not been requested */ 7918 if (compute_eigs) { 7919 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7920 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7921 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7922 if (neigs) { 7923 lambda_max = eigs_r[neigs-1]; 7924 lambda_min = eigs_r[0]; 7925 if (pcbddc->use_coarse_estimates) { 7926 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7927 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7928 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7929 } 7930 } 7931 } 7932 } 7933 7934 /* check coarse problem residual error */ 7935 if (pcbddc->dbg_flag) { 7936 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7937 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7938 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7939 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7940 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7941 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7942 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7943 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7944 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7945 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7946 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7947 if (CoarseNullSpace) { 7948 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7949 } 7950 if (compute_eigs) { 7951 PetscReal lambda_max_s,lambda_min_s; 7952 KSPConvergedReason reason; 7953 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7954 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7955 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7956 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7957 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7958 for (i=0;i<neigs;i++) { 7959 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7960 } 7961 } 7962 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7963 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7964 } 7965 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7966 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7967 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7968 if (compute_eigs) { 7969 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7970 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7971 } 7972 } 7973 } 7974 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7975 /* print additional info */ 7976 if (pcbddc->dbg_flag) { 7977 /* waits until all processes reaches this point */ 7978 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7979 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7980 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7981 } 7982 7983 /* free memory */ 7984 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7985 PetscFunctionReturn(0); 7986 } 7987 7988 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7989 { 7990 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7991 PC_IS* pcis = (PC_IS*)pc->data; 7992 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7993 IS subset,subset_mult,subset_n; 7994 PetscInt local_size,coarse_size=0; 7995 PetscInt *local_primal_indices=NULL; 7996 const PetscInt *t_local_primal_indices; 7997 PetscErrorCode ierr; 7998 7999 PetscFunctionBegin; 8000 /* Compute global number of coarse dofs */ 8001 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8002 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8003 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8004 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8005 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8006 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8007 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8008 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8009 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8010 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8011 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8012 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8013 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8014 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8015 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8016 8017 /* check numbering */ 8018 if (pcbddc->dbg_flag) { 8019 PetscScalar coarsesum,*array,*array2; 8020 PetscInt i; 8021 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8022 8023 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8024 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8025 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8026 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8027 /* counter */ 8028 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8029 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8030 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8031 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8032 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8033 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8034 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8035 for (i=0;i<pcbddc->local_primal_size;i++) { 8036 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8037 } 8038 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8039 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8040 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8041 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8042 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8043 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8044 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8045 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8046 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8047 for (i=0;i<pcis->n;i++) { 8048 if (array[i] != 0.0 && array[i] != array2[i]) { 8049 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8050 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8051 set_error = PETSC_TRUE; 8052 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8053 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8054 } 8055 } 8056 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8057 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8058 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8059 for (i=0;i<pcis->n;i++) { 8060 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8061 } 8062 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8063 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8064 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8065 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8066 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8068 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8069 PetscInt *gidxs; 8070 8071 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8072 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8073 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8074 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8075 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8076 for (i=0;i<pcbddc->local_primal_size;i++) { 8077 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8078 } 8079 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8080 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8081 } 8082 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8083 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8084 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8085 } 8086 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8087 /* get back data */ 8088 *coarse_size_n = coarse_size; 8089 *local_primal_indices_n = local_primal_indices; 8090 PetscFunctionReturn(0); 8091 } 8092 8093 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8094 { 8095 IS localis_t; 8096 PetscInt i,lsize,*idxs,n; 8097 PetscScalar *vals; 8098 PetscErrorCode ierr; 8099 8100 PetscFunctionBegin; 8101 /* get indices in local ordering exploiting local to global map */ 8102 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8103 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8104 for (i=0;i<lsize;i++) vals[i] = 1.0; 8105 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8106 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8107 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8108 if (idxs) { /* multilevel guard */ 8109 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8110 } 8111 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8112 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8113 ierr = PetscFree(vals);CHKERRQ(ierr); 8114 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8115 /* now compute set in local ordering */ 8116 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8117 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8118 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8119 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8120 for (i=0,lsize=0;i<n;i++) { 8121 if (PetscRealPart(vals[i]) > 0.5) { 8122 lsize++; 8123 } 8124 } 8125 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8126 for (i=0,lsize=0;i<n;i++) { 8127 if (PetscRealPart(vals[i]) > 0.5) { 8128 idxs[lsize++] = i; 8129 } 8130 } 8131 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8132 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8133 *localis = localis_t; 8134 PetscFunctionReturn(0); 8135 } 8136 8137 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8138 { 8139 PC_IS *pcis=(PC_IS*)pc->data; 8140 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8141 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8142 Mat S_j; 8143 PetscInt *used_xadj,*used_adjncy; 8144 PetscBool free_used_adj; 8145 PetscErrorCode ierr; 8146 8147 PetscFunctionBegin; 8148 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8149 free_used_adj = PETSC_FALSE; 8150 if (pcbddc->sub_schurs_layers == -1) { 8151 used_xadj = NULL; 8152 used_adjncy = NULL; 8153 } else { 8154 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8155 used_xadj = pcbddc->mat_graph->xadj; 8156 used_adjncy = pcbddc->mat_graph->adjncy; 8157 } else if (pcbddc->computed_rowadj) { 8158 used_xadj = pcbddc->mat_graph->xadj; 8159 used_adjncy = pcbddc->mat_graph->adjncy; 8160 } else { 8161 PetscBool flg_row=PETSC_FALSE; 8162 const PetscInt *xadj,*adjncy; 8163 PetscInt nvtxs; 8164 8165 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8166 if (flg_row) { 8167 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8168 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8169 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8170 free_used_adj = PETSC_TRUE; 8171 } else { 8172 pcbddc->sub_schurs_layers = -1; 8173 used_xadj = NULL; 8174 used_adjncy = NULL; 8175 } 8176 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8177 } 8178 } 8179 8180 /* setup sub_schurs data */ 8181 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8182 if (!sub_schurs->schur_explicit) { 8183 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8184 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8185 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8186 } else { 8187 Mat change = NULL; 8188 Vec scaling = NULL; 8189 IS change_primal = NULL, iP; 8190 PetscInt benign_n; 8191 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8192 PetscBool isseqaij,need_change = PETSC_FALSE; 8193 PetscBool discrete_harmonic = PETSC_FALSE; 8194 8195 if (!pcbddc->use_vertices && reuse_solvers) { 8196 PetscInt n_vertices; 8197 8198 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8199 reuse_solvers = (PetscBool)!n_vertices; 8200 } 8201 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8202 if (!isseqaij) { 8203 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8204 if (matis->A == pcbddc->local_mat) { 8205 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8206 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8207 } else { 8208 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8209 } 8210 } 8211 if (!pcbddc->benign_change_explicit) { 8212 benign_n = pcbddc->benign_n; 8213 } else { 8214 benign_n = 0; 8215 } 8216 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8217 We need a global reduction to avoid possible deadlocks. 8218 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8219 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8220 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8221 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8222 need_change = (PetscBool)(!need_change); 8223 } 8224 /* If the user defines additional constraints, we import them here. 8225 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8226 if (need_change) { 8227 PC_IS *pcisf; 8228 PC_BDDC *pcbddcf; 8229 PC pcf; 8230 8231 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8232 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8233 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8234 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8235 8236 /* hacks */ 8237 pcisf = (PC_IS*)pcf->data; 8238 pcisf->is_B_local = pcis->is_B_local; 8239 pcisf->vec1_N = pcis->vec1_N; 8240 pcisf->BtoNmap = pcis->BtoNmap; 8241 pcisf->n = pcis->n; 8242 pcisf->n_B = pcis->n_B; 8243 pcbddcf = (PC_BDDC*)pcf->data; 8244 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8245 pcbddcf->mat_graph = pcbddc->mat_graph; 8246 pcbddcf->use_faces = PETSC_TRUE; 8247 pcbddcf->use_change_of_basis = PETSC_TRUE; 8248 pcbddcf->use_change_on_faces = PETSC_TRUE; 8249 pcbddcf->use_qr_single = PETSC_TRUE; 8250 pcbddcf->fake_change = PETSC_TRUE; 8251 8252 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8253 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8254 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8255 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8256 change = pcbddcf->ConstraintMatrix; 8257 pcbddcf->ConstraintMatrix = NULL; 8258 8259 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8260 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8261 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8262 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8263 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8264 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8265 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8266 pcf->ops->destroy = NULL; 8267 pcf->ops->reset = NULL; 8268 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8269 } 8270 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8271 8272 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8273 if (iP) { 8274 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8275 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8276 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8277 } 8278 if (discrete_harmonic) { 8279 Mat A; 8280 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8281 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8282 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8283 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8284 ierr = MatDestroy(&A);CHKERRQ(ierr); 8285 } else { 8286 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8287 } 8288 ierr = MatDestroy(&change);CHKERRQ(ierr); 8289 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8290 } 8291 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8292 8293 /* free adjacency */ 8294 if (free_used_adj) { 8295 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8296 } 8297 PetscFunctionReturn(0); 8298 } 8299 8300 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8301 { 8302 PC_IS *pcis=(PC_IS*)pc->data; 8303 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8304 PCBDDCGraph graph; 8305 PetscErrorCode ierr; 8306 8307 PetscFunctionBegin; 8308 /* attach interface graph for determining subsets */ 8309 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8310 IS verticesIS,verticescomm; 8311 PetscInt vsize,*idxs; 8312 8313 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8314 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8315 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8316 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8317 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8318 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8319 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8320 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8321 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8322 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8323 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8324 } else { 8325 graph = pcbddc->mat_graph; 8326 } 8327 /* print some info */ 8328 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8329 IS vertices; 8330 PetscInt nv,nedges,nfaces; 8331 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8332 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8333 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8334 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8335 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8336 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8338 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8339 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8340 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8341 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8342 } 8343 8344 /* sub_schurs init */ 8345 if (!pcbddc->sub_schurs) { 8346 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8347 } 8348 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8349 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8350 8351 /* free graph struct */ 8352 if (pcbddc->sub_schurs_rebuild) { 8353 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8354 } 8355 PetscFunctionReturn(0); 8356 } 8357 8358 PetscErrorCode PCBDDCCheckOperator(PC pc) 8359 { 8360 PC_IS *pcis=(PC_IS*)pc->data; 8361 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8362 PetscErrorCode ierr; 8363 8364 PetscFunctionBegin; 8365 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8366 IS zerodiag = NULL; 8367 Mat S_j,B0_B=NULL; 8368 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8369 PetscScalar *p0_check,*array,*array2; 8370 PetscReal norm; 8371 PetscInt i; 8372 8373 /* B0 and B0_B */ 8374 if (zerodiag) { 8375 IS dummy; 8376 8377 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8378 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8379 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8380 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8381 } 8382 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8383 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8384 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8385 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8386 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8387 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8388 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8389 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8390 /* S_j */ 8391 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8392 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8393 8394 /* mimic vector in \widetilde{W}_\Gamma */ 8395 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8396 /* continuous in primal space */ 8397 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8398 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8399 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8400 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8401 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8402 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8403 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8404 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8405 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8406 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8407 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8408 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8409 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8410 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8411 8412 /* assemble rhs for coarse problem */ 8413 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8414 /* local with Schur */ 8415 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8416 if (zerodiag) { 8417 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8418 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8419 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8420 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8421 } 8422 /* sum on primal nodes the local contributions */ 8423 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8424 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8425 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8426 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8427 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8428 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8429 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8430 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8431 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8432 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8433 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8434 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8435 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8436 /* scale primal nodes (BDDC sums contibutions) */ 8437 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8438 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8439 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8440 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8441 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8442 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8443 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8444 /* global: \widetilde{B0}_B w_\Gamma */ 8445 if (zerodiag) { 8446 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8447 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8448 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8449 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8450 } 8451 /* BDDC */ 8452 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8453 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8454 8455 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8456 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8457 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8458 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8459 for (i=0;i<pcbddc->benign_n;i++) { 8460 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8461 } 8462 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8463 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8464 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8465 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8466 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8467 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8468 } 8469 PetscFunctionReturn(0); 8470 } 8471 8472 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8473 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8474 { 8475 Mat At; 8476 IS rows; 8477 PetscInt rst,ren; 8478 PetscErrorCode ierr; 8479 PetscLayout rmap; 8480 8481 PetscFunctionBegin; 8482 rst = ren = 0; 8483 if (ccomm != MPI_COMM_NULL) { 8484 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8485 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8486 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8487 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8488 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8489 } 8490 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8491 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8492 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8493 8494 if (ccomm != MPI_COMM_NULL) { 8495 Mat_MPIAIJ *a,*b; 8496 IS from,to; 8497 Vec gvec; 8498 PetscInt lsize; 8499 8500 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8501 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8502 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8503 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8504 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8505 a = (Mat_MPIAIJ*)At->data; 8506 b = (Mat_MPIAIJ*)(*B)->data; 8507 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8508 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8509 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8510 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8511 b->A = a->A; 8512 b->B = a->B; 8513 8514 b->donotstash = a->donotstash; 8515 b->roworiented = a->roworiented; 8516 b->rowindices = 0; 8517 b->rowvalues = 0; 8518 b->getrowactive = PETSC_FALSE; 8519 8520 (*B)->rmap = rmap; 8521 (*B)->factortype = A->factortype; 8522 (*B)->assembled = PETSC_TRUE; 8523 (*B)->insertmode = NOT_SET_VALUES; 8524 (*B)->preallocated = PETSC_TRUE; 8525 8526 if (a->colmap) { 8527 #if defined(PETSC_USE_CTABLE) 8528 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8529 #else 8530 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8531 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8532 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8533 #endif 8534 } else b->colmap = 0; 8535 if (a->garray) { 8536 PetscInt len; 8537 len = a->B->cmap->n; 8538 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8539 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8540 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8541 } else b->garray = 0; 8542 8543 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8544 b->lvec = a->lvec; 8545 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8546 8547 /* cannot use VecScatterCopy */ 8548 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8549 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8550 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8551 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8552 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8553 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8554 ierr = ISDestroy(&from);CHKERRQ(ierr); 8555 ierr = ISDestroy(&to);CHKERRQ(ierr); 8556 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8557 } 8558 ierr = MatDestroy(&At);CHKERRQ(ierr); 8559 PetscFunctionReturn(0); 8560 } 8561