1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscdmplex.h> 5 #include <petscblaslapack.h> 6 #include <petsc/private/sfimpl.h> 7 #include <petsc/private/dmpleximpl.h> 8 #include <petscdmda.h> 9 10 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 11 12 /* if range is true, it returns B s.t. span{B} = range(A) 13 if range is false, it returns B s.t. range(B) _|_ range(A) */ 14 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 15 { 16 #if !defined(PETSC_USE_COMPLEX) 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 PetscErrorCode ierr; 22 23 PetscFunctionBegin; 24 #if defined(PETSC_MISSING_LAPACK_GESVD) 25 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 26 #else 27 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 28 if (!nr || !nc) PetscFunctionReturn(0); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 33 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr,nc); 39 if (!rwork) { 40 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 50 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 51 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 52 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 53 ierr = PetscFPTrapPop();CHKERRQ(ierr); 54 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 55 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 56 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 57 if (!rwork) { 58 ierr = PetscFree(sing);CHKERRQ(ierr); 59 } 60 if (!work) { 61 ierr = PetscFree(uwork);CHKERRQ(ierr); 62 } 63 /* create B */ 64 if (!range) { 65 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 66 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 67 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 68 } else { 69 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 70 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 71 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 72 } 73 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 74 ierr = PetscFree(U);CHKERRQ(ierr); 75 #endif 76 #else /* PETSC_USE_COMPLEX */ 77 PetscFunctionBegin; 78 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 79 #endif 80 PetscFunctionReturn(0); 81 } 82 83 /* TODO REMOVE */ 84 #if defined(PRINT_GDET) 85 static int inc = 0; 86 static int lev = 0; 87 #endif 88 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 PetscErrorCode ierr; 92 Mat GE,GEd; 93 PetscInt rsize,csize,esize; 94 PetscScalar *ptr; 95 96 PetscFunctionBegin; 97 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 98 if (!esize) PetscFunctionReturn(0); 99 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 100 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 101 102 /* gradients */ 103 ptr = work + 5*esize; 104 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 105 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 106 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 107 ierr = MatDestroy(&GE);CHKERRQ(ierr); 108 109 /* constants */ 110 ptr += rsize*csize; 111 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 112 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 113 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 114 ierr = MatDestroy(&GE);CHKERRQ(ierr); 115 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 116 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 117 118 if (corners) { 119 Mat GEc; 120 PetscScalar *vals,v; 121 122 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 123 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 124 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 130 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 137 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 138 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 139 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 141 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 142 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 143 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 144 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 145 } 146 #endif 147 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 148 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode PCBDDCNedelecSupport(PC pc) 155 { 156 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 158 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 159 Vec tvec; 160 PetscSF sfv; 161 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 162 MPI_Comm comm; 163 IS lned,primals,allprimals,nedfieldlocal; 164 IS *eedges,*extrows,*extcols,*alleedges; 165 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 166 PetscScalar *vals,*work; 167 PetscReal *rwork; 168 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 169 PetscInt ne,nv,Lv,order,n,field; 170 PetscInt n_neigh,*neigh,*n_shared,**shared; 171 PetscInt i,j,extmem,cum,maxsize,nee; 172 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 173 PetscInt *sfvleaves,*sfvroots; 174 PetscInt *corners,*cedges; 175 PetscInt *ecount,**eneighs,*vcount,**vneighs; 176 #if defined(PETSC_USE_DEBUG) 177 PetscInt *emarks; 178 #endif 179 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 /* If the discrete gradient is defined for a subset of dofs and global is true, 184 it assumes G is given in global ordering for all the dofs. 185 Otherwise, the ordering is global for the Nedelec field */ 186 order = pcbddc->nedorder; 187 conforming = pcbddc->conforming; 188 field = pcbddc->nedfield; 189 global = pcbddc->nedglobal; 190 setprimal = PETSC_FALSE; 191 print = PETSC_FALSE; 192 singular = PETSC_FALSE; 193 194 /* Command line customization */ 195 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 196 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 199 /* print debug info TODO: to be removed */ 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsEnd();CHKERRQ(ierr); 202 203 /* Return if there are no edges in the decomposition and the problem is not singular */ 204 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 205 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 206 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 207 if (!singular) { 208 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 209 lrc[0] = PETSC_FALSE; 210 for (i=0;i<n;i++) { 211 if (PetscRealPart(vals[i]) > 2.) { 212 lrc[0] = PETSC_TRUE; 213 break; 214 } 215 } 216 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 217 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 218 if (!lrc[1]) PetscFunctionReturn(0); 219 } 220 221 /* Get Nedelec field */ 222 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 223 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 224 if (pcbddc->n_ISForDofsLocal && field >= 0) { 225 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 226 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 227 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 228 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 229 ne = n; 230 nedfieldlocal = NULL; 231 global = PETSC_TRUE; 232 } else if (field == PETSC_DECIDE) { 233 PetscInt rst,ren,*idx; 234 235 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 236 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 238 for (i=rst;i<ren;i++) { 239 PetscInt nc; 240 241 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 242 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 243 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 244 } 245 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 246 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 248 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 249 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 250 } else { 251 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 252 } 253 254 /* Sanity checks */ 255 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 256 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 257 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 258 259 /* Just set primal dofs and return */ 260 if (setprimal) { 261 IS enedfieldlocal; 262 PetscInt *eidxs; 263 264 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 266 if (nedfieldlocal) { 267 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 268 for (i=0,cum=0;i<ne;i++) { 269 if (PetscRealPart(vals[idxs[i]]) > 2.) { 270 eidxs[cum++] = idxs[i]; 271 } 272 } 273 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 274 } else { 275 for (i=0,cum=0;i<ne;i++) { 276 if (PetscRealPart(vals[i]) > 2.) { 277 eidxs[cum++] = i; 278 } 279 } 280 } 281 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 282 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 283 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 284 ierr = PetscFree(eidxs);CHKERRQ(ierr); 285 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 286 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 /* Compute some l2g maps */ 291 if (nedfieldlocal) { 292 IS is; 293 294 /* need to map from the local Nedelec field to local numbering */ 295 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 297 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 298 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 299 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 300 if (global) { 301 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 302 el2g = al2g; 303 } else { 304 IS gis; 305 306 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 307 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 308 ierr = ISDestroy(&gis);CHKERRQ(ierr); 309 } 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } else { 312 /* restore default */ 313 pcbddc->nedfield = -1; 314 /* one ref for the destruction of al2g, one for el2g */ 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 el2g = al2g; 318 fl2g = NULL; 319 } 320 321 /* Start communication to drop connections for interior edges (for cc analysis only) */ 322 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 323 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 324 if (nedfieldlocal) { 325 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 326 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 327 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 328 } else { 329 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 330 } 331 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 334 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 335 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 336 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 337 if (global) { 338 PetscInt rst; 339 340 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 341 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 342 if (matis->sf_rootdata[i] < 2) { 343 matis->sf_rootdata[cum++] = i + rst; 344 } 345 } 346 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 347 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 348 } else { 349 PetscInt *tbz; 350 351 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 352 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 353 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 355 for (i=0,cum=0;i<ne;i++) 356 if (matis->sf_leafdata[idxs[i]] == 1) 357 tbz[cum++] = i; 358 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 359 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 360 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 361 ierr = PetscFree(tbz);CHKERRQ(ierr); 362 } 363 } else { /* we need the entire G to infer the nullspace */ 364 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 365 G = pcbddc->discretegradient; 366 } 367 368 /* Extract subdomain relevant rows of G */ 369 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 371 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 372 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 373 ierr = ISDestroy(&lned);CHKERRQ(ierr); 374 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 375 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 376 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 377 378 /* SF for nodal dofs communications */ 379 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 380 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 381 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 382 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 383 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 384 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 385 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 386 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 387 i = singular ? 2 : 1; 388 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 389 390 /* Destroy temporary G created in MATIS format and modified G */ 391 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 392 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 393 ierr = MatDestroy(&G);CHKERRQ(ierr); 394 395 if (print) { 396 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 397 ierr = MatView(lG,NULL);CHKERRQ(ierr); 398 } 399 400 /* Save lG for values insertion in change of basis */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 402 403 /* Analyze the edge-nodes connections (duplicate lG) */ 404 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 405 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 406 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 410 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 411 /* need to import the boundary specification to ensure the 412 proper detection of coarse edges' endpoints */ 413 if (pcbddc->DirichletBoundariesLocal) { 414 IS is; 415 416 if (fl2g) { 417 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 418 } else { 419 is = pcbddc->DirichletBoundariesLocal; 420 } 421 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 422 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 423 for (i=0;i<cum;i++) { 424 if (idxs[i] >= 0) { 425 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 426 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 427 } 428 } 429 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 430 if (fl2g) { 431 ierr = ISDestroy(&is);CHKERRQ(ierr); 432 } 433 } 434 if (pcbddc->NeumannBoundariesLocal) { 435 IS is; 436 437 if (fl2g) { 438 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 439 } else { 440 is = pcbddc->NeumannBoundariesLocal; 441 } 442 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 443 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 444 for (i=0;i<cum;i++) { 445 if (idxs[i] >= 0) { 446 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 447 } 448 } 449 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 450 if (fl2g) { 451 ierr = ISDestroy(&is);CHKERRQ(ierr); 452 } 453 } 454 455 /* Count neighs per dof */ 456 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 457 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 458 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 459 for (i=1,cum=0;i<n_neigh;i++) { 460 cum += n_shared[i]; 461 for (j=0;j<n_shared[i];j++) { 462 ecount[shared[i][j]]++; 463 } 464 } 465 if (ne) { 466 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 467 } 468 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 469 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 470 for (i=1;i<n_neigh;i++) { 471 for (j=0;j<n_shared[i];j++) { 472 PetscInt k = shared[i][j]; 473 eneighs[k][ecount[k]] = neigh[i]; 474 ecount[k]++; 475 } 476 } 477 for (i=0;i<ne;i++) { 478 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 479 } 480 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 481 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 482 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 483 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 484 for (i=1,cum=0;i<n_neigh;i++) { 485 cum += n_shared[i]; 486 for (j=0;j<n_shared[i];j++) { 487 vcount[shared[i][j]]++; 488 } 489 } 490 if (nv) { 491 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 492 } 493 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 494 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 495 for (i=1;i<n_neigh;i++) { 496 for (j=0;j<n_shared[i];j++) { 497 PetscInt k = shared[i][j]; 498 vneighs[k][vcount[k]] = neigh[i]; 499 vcount[k]++; 500 } 501 } 502 for (i=0;i<nv;i++) { 503 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 504 } 505 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 506 507 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 508 for proper detection of coarse edges' endpoints */ 509 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 510 for (i=0;i<ne;i++) { 511 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 512 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 513 } 514 } 515 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 516 if (!conforming) { 517 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 518 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 519 } 520 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 521 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 522 cum = 0; 523 for (i=0;i<ne;i++) { 524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 525 if (!PetscBTLookup(btee,i)) { 526 marks[cum++] = i; 527 continue; 528 } 529 /* set badly connected edge dofs as primal */ 530 if (!conforming) { 531 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 532 marks[cum++] = i; 533 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 534 for (j=ii[i];j<ii[i+1];j++) { 535 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 536 } 537 } else { 538 /* every edge dofs should be connected trough a certain number of nodal dofs 539 to other edge dofs belonging to coarse edges 540 - at most 2 endpoints 541 - order-1 interior nodal dofs 542 - no undefined nodal dofs (nconn < order) 543 */ 544 PetscInt ends = 0,ints = 0, undef = 0; 545 for (j=ii[i];j<ii[i+1];j++) { 546 PetscInt v = jj[j],k; 547 PetscInt nconn = iit[v+1]-iit[v]; 548 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 549 if (nconn > order) ends++; 550 else if (nconn == order) ints++; 551 else undef++; 552 } 553 if (undef || ends > 2 || ints != order -1) { 554 marks[cum++] = i; 555 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 556 for (j=ii[i];j<ii[i+1];j++) { 557 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 563 if (!order && ii[i+1] != ii[i]) { 564 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 565 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 566 } 567 } 568 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 569 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 570 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 571 if (!conforming) { 572 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 573 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 574 } 575 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 576 577 /* identify splitpoints and corner candidates */ 578 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 579 if (print) { 580 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 581 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 582 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 583 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 584 } 585 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 586 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 587 for (i=0;i<nv;i++) { 588 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 589 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 590 if (!order) { /* variable order */ 591 PetscReal vorder = 0.; 592 593 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 594 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 595 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 596 ord = 1; 597 } 598 #if defined(PETSC_USE_DEBUG) 599 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 600 #endif 601 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 602 if (PetscBTLookup(btbd,jj[j])) { 603 bdir = PETSC_TRUE; 604 break; 605 } 606 if (vc != ecount[jj[j]]) { 607 sneighs = PETSC_FALSE; 608 } else { 609 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 610 for (k=0;k<vc;k++) { 611 if (vn[k] != en[k]) { 612 sneighs = PETSC_FALSE; 613 break; 614 } 615 } 616 } 617 } 618 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 619 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 620 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 621 } else if (test == ord) { 622 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 623 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 624 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 625 } else { 626 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 627 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 628 } 629 } 630 } 631 ierr = PetscFree(ecount);CHKERRQ(ierr); 632 ierr = PetscFree(vcount);CHKERRQ(ierr); 633 if (ne) { 634 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 635 } 636 if (nv) { 637 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 638 } 639 ierr = PetscFree(eneighs);CHKERRQ(ierr); 640 ierr = PetscFree(vneighs);CHKERRQ(ierr); 641 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 642 643 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 644 if (order != 1) { 645 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 646 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 647 for (i=0;i<nv;i++) { 648 if (PetscBTLookup(btvcand,i)) { 649 PetscBool found = PETSC_FALSE; 650 for (j=ii[i];j<ii[i+1] && !found;j++) { 651 PetscInt k,e = jj[j]; 652 if (PetscBTLookup(bte,e)) continue; 653 for (k=iit[e];k<iit[e+1];k++) { 654 PetscInt v = jjt[k]; 655 if (v != i && PetscBTLookup(btvcand,v)) { 656 found = PETSC_TRUE; 657 break; 658 } 659 } 660 } 661 if (!found) { 662 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 663 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 664 } else { 665 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 666 } 667 } 668 } 669 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 670 } 671 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 672 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 673 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 674 675 /* Get the local G^T explicitly */ 676 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 677 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 678 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 679 680 /* Mark interior nodal dofs */ 681 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 682 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 683 for (i=1;i<n_neigh;i++) { 684 for (j=0;j<n_shared[i];j++) { 685 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 686 } 687 } 688 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 689 690 /* communicate corners and splitpoints */ 691 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 692 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 694 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 695 696 if (print) { 697 IS tbz; 698 699 cum = 0; 700 for (i=0;i<nv;i++) 701 if (sfvleaves[i]) 702 vmarks[cum++] = i; 703 704 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 705 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 706 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 707 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 708 } 709 710 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 711 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 713 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 715 /* Zero rows of lGt corresponding to identified corners 716 and interior nodal dofs */ 717 cum = 0; 718 for (i=0;i<nv;i++) { 719 if (sfvleaves[i]) { 720 vmarks[cum++] = i; 721 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 722 } 723 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 724 } 725 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 726 if (print) { 727 IS tbz; 728 729 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 730 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 731 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 732 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 733 } 734 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 735 ierr = PetscFree(vmarks);CHKERRQ(ierr); 736 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 737 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 738 739 /* Recompute G */ 740 ierr = MatDestroy(&lG);CHKERRQ(ierr); 741 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 742 if (print) { 743 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 744 ierr = MatView(lG,NULL);CHKERRQ(ierr); 745 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 746 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 747 } 748 749 /* Get primal dofs (if any) */ 750 cum = 0; 751 for (i=0;i<ne;i++) { 752 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 753 } 754 if (fl2g) { 755 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 756 } 757 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 758 if (print) { 759 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 760 ierr = ISView(primals,NULL);CHKERRQ(ierr); 761 } 762 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 763 /* TODO: what if the user passed in some of them ? */ 764 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 765 ierr = ISDestroy(&primals);CHKERRQ(ierr); 766 767 /* Compute edge connectivity */ 768 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 769 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 770 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 771 if (fl2g) { 772 PetscBT btf; 773 PetscInt *iia,*jja,*iiu,*jju; 774 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 775 776 /* create CSR for all local dofs */ 777 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 778 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 779 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 780 iiu = pcbddc->mat_graph->xadj; 781 jju = pcbddc->mat_graph->adjncy; 782 } else if (pcbddc->use_local_adj) { 783 rest = PETSC_TRUE; 784 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 785 } else { 786 free = PETSC_TRUE; 787 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 788 iiu[0] = 0; 789 for (i=0;i<n;i++) { 790 iiu[i+1] = i+1; 791 jju[i] = -1; 792 } 793 } 794 795 /* import sizes of CSR */ 796 iia[0] = 0; 797 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 798 799 /* overwrite entries corresponding to the Nedelec field */ 800 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 801 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 802 for (i=0;i<ne;i++) { 803 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 804 iia[idxs[i]+1] = ii[i+1]-ii[i]; 805 } 806 807 /* iia in CSR */ 808 for (i=0;i<n;i++) iia[i+1] += iia[i]; 809 810 /* jja in CSR */ 811 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 812 for (i=0;i<n;i++) 813 if (!PetscBTLookup(btf,i)) 814 for (j=0;j<iiu[i+1]-iiu[i];j++) 815 jja[iia[i]+j] = jju[iiu[i]+j]; 816 817 /* map edge dofs connectivity */ 818 if (jj) { 819 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 820 for (i=0;i<ne;i++) { 821 PetscInt e = idxs[i]; 822 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 823 } 824 } 825 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 826 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 827 if (rest) { 828 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 829 } 830 if (free) { 831 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 832 } 833 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 834 } else { 835 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 836 } 837 838 /* Analyze interface for edge dofs */ 839 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 840 pcbddc->mat_graph->twodim = PETSC_FALSE; 841 842 /* Get coarse edges in the edge space */ 843 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 844 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 845 846 if (fl2g) { 847 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 848 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 849 for (i=0;i<nee;i++) { 850 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 851 } 852 } else { 853 eedges = alleedges; 854 primals = allprimals; 855 } 856 857 /* Mark fine edge dofs with their coarse edge id */ 858 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 859 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 860 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 861 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 862 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 863 if (print) { 864 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 865 ierr = ISView(primals,NULL);CHKERRQ(ierr); 866 } 867 868 maxsize = 0; 869 for (i=0;i<nee;i++) { 870 PetscInt size,mark = i+1; 871 872 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 873 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 874 for (j=0;j<size;j++) marks[idxs[j]] = mark; 875 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 876 maxsize = PetscMax(maxsize,size); 877 } 878 879 /* Find coarse edge endpoints */ 880 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 881 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 882 for (i=0;i<nee;i++) { 883 PetscInt mark = i+1,size; 884 885 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 886 if (!size && nedfieldlocal) continue; 887 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 888 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 889 if (print) { 890 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 891 ISView(eedges[i],NULL); 892 } 893 for (j=0;j<size;j++) { 894 PetscInt k, ee = idxs[j]; 895 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 896 for (k=ii[ee];k<ii[ee+1];k++) { 897 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 898 if (PetscBTLookup(btv,jj[k])) { 899 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 900 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 901 PetscInt k2; 902 PetscBool corner = PETSC_FALSE; 903 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 904 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 905 /* it's a corner if either is connected with an edge dof belonging to a different cc or 906 if the edge dof lie on the natural part of the boundary */ 907 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 908 corner = PETSC_TRUE; 909 break; 910 } 911 } 912 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 913 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 914 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 915 } else { 916 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 917 } 918 } 919 } 920 } 921 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 922 } 923 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 924 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 925 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 926 927 /* Reset marked primal dofs */ 928 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 929 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 930 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 931 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 932 933 /* Now use the initial lG */ 934 ierr = MatDestroy(&lG);CHKERRQ(ierr); 935 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 936 lG = lGinit; 937 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 938 939 /* Compute extended cols indices */ 940 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 941 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 942 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 943 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 944 i *= maxsize; 945 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 946 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 947 eerr = PETSC_FALSE; 948 for (i=0;i<nee;i++) { 949 PetscInt size,found = 0; 950 951 cum = 0; 952 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 953 if (!size && nedfieldlocal) continue; 954 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 955 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 956 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 957 for (j=0;j<size;j++) { 958 PetscInt k,ee = idxs[j]; 959 for (k=ii[ee];k<ii[ee+1];k++) { 960 PetscInt vv = jj[k]; 961 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 962 else if (!PetscBTLookupSet(btvc,vv)) found++; 963 } 964 } 965 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 966 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 967 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 968 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 969 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 970 /* it may happen that endpoints are not defined at this point 971 if it is the case, mark this edge for a second pass */ 972 if (cum != size -1 || found != 2) { 973 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 974 if (print) { 975 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 976 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 977 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 978 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 979 } 980 eerr = PETSC_TRUE; 981 } 982 } 983 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 984 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 985 if (done) { 986 PetscInt *newprimals; 987 988 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 989 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 990 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 991 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 992 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 993 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 994 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 995 for (i=0;i<nee;i++) { 996 PetscBool has_candidates = PETSC_FALSE; 997 if (PetscBTLookup(bter,i)) { 998 PetscInt size,mark = i+1; 999 1000 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1001 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1002 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1003 for (j=0;j<size;j++) { 1004 PetscInt k,ee = idxs[j]; 1005 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1006 for (k=ii[ee];k<ii[ee+1];k++) { 1007 /* set all candidates located on the edge as corners */ 1008 if (PetscBTLookup(btvcand,jj[k])) { 1009 PetscInt k2,vv = jj[k]; 1010 has_candidates = PETSC_TRUE; 1011 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1012 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1013 /* set all edge dofs connected to candidate as primals */ 1014 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1015 if (marks[jjt[k2]] == mark) { 1016 PetscInt k3,ee2 = jjt[k2]; 1017 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1018 newprimals[cum++] = ee2; 1019 /* finally set the new corners */ 1020 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1021 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1022 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1023 } 1024 } 1025 } 1026 } else { 1027 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1028 } 1029 } 1030 } 1031 if (!has_candidates) { /* circular edge */ 1032 PetscInt k, ee = idxs[0],*tmarks; 1033 1034 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1035 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1036 for (k=ii[ee];k<ii[ee+1];k++) { 1037 PetscInt k2; 1038 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1039 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1040 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1041 } 1042 for (j=0;j<size;j++) { 1043 if (tmarks[idxs[j]] > 1) { 1044 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1045 newprimals[cum++] = idxs[j]; 1046 } 1047 } 1048 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1049 } 1050 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1051 } 1052 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1053 } 1054 ierr = PetscFree(extcols);CHKERRQ(ierr); 1055 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1056 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1057 if (fl2g) { 1058 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1059 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1060 for (i=0;i<nee;i++) { 1061 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(eedges);CHKERRQ(ierr); 1064 } 1065 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1066 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1067 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1068 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1069 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1070 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1071 pcbddc->mat_graph->twodim = PETSC_FALSE; 1072 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1073 if (fl2g) { 1074 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1076 for (i=0;i<nee;i++) { 1077 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1078 } 1079 } else { 1080 eedges = alleedges; 1081 primals = allprimals; 1082 } 1083 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1084 1085 /* Mark again */ 1086 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1087 for (i=0;i<nee;i++) { 1088 PetscInt size,mark = i+1; 1089 1090 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1091 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1092 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1093 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1094 } 1095 if (print) { 1096 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1097 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1098 } 1099 1100 /* Recompute extended cols */ 1101 eerr = PETSC_FALSE; 1102 for (i=0;i<nee;i++) { 1103 PetscInt size; 1104 1105 cum = 0; 1106 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1107 if (!size && nedfieldlocal) continue; 1108 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1109 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1110 for (j=0;j<size;j++) { 1111 PetscInt k,ee = idxs[j]; 1112 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1113 } 1114 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1115 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1116 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1117 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1118 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1119 if (cum != size -1) { 1120 if (print) { 1121 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1122 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1124 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1125 } 1126 eerr = PETSC_TRUE; 1127 } 1128 } 1129 } 1130 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1131 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1132 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1133 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1134 /* an error should not occur at this point */ 1135 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1136 1137 /* Check the number of endpoints */ 1138 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1139 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1141 for (i=0;i<nee;i++) { 1142 PetscInt size, found = 0, gc[2]; 1143 1144 /* init with defaults */ 1145 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1146 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1147 if (!size && nedfieldlocal) continue; 1148 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1149 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1150 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1151 for (j=0;j<size;j++) { 1152 PetscInt k,ee = idxs[j]; 1153 for (k=ii[ee];k<ii[ee+1];k++) { 1154 PetscInt vv = jj[k]; 1155 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1156 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1157 corners[i*2+found++] = vv; 1158 } 1159 } 1160 } 1161 if (found != 2) { 1162 PetscInt e; 1163 if (fl2g) { 1164 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1165 } else { 1166 e = idxs[0]; 1167 } 1168 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1169 } 1170 1171 /* get primal dof index on this coarse edge */ 1172 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1173 if (gc[0] > gc[1]) { 1174 PetscInt swap = corners[2*i]; 1175 corners[2*i] = corners[2*i+1]; 1176 corners[2*i+1] = swap; 1177 } 1178 cedges[i] = idxs[size-1]; 1179 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1180 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1181 } 1182 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1183 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1184 1185 #if defined(PETSC_USE_DEBUG) 1186 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1187 not interfere with neighbouring coarse edges */ 1188 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1189 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1190 for (i=0;i<nv;i++) { 1191 PetscInt emax = 0,eemax = 0; 1192 1193 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1194 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1195 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1196 for (j=1;j<nee+1;j++) { 1197 if (emax < emarks[j]) { 1198 emax = emarks[j]; 1199 eemax = j; 1200 } 1201 } 1202 /* not relevant for edges */ 1203 if (!eemax) continue; 1204 1205 for (j=ii[i];j<ii[i+1];j++) { 1206 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1207 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1208 } 1209 } 1210 } 1211 ierr = PetscFree(emarks);CHKERRQ(ierr); 1212 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1213 #endif 1214 1215 /* Compute extended rows indices for edge blocks of the change of basis */ 1216 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1217 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1218 extmem *= maxsize; 1219 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1220 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1221 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1222 for (i=0;i<nv;i++) { 1223 PetscInt mark = 0,size,start; 1224 1225 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1226 for (j=ii[i];j<ii[i+1];j++) 1227 if (marks[jj[j]] && !mark) 1228 mark = marks[jj[j]]; 1229 1230 /* not relevant */ 1231 if (!mark) continue; 1232 1233 /* import extended row */ 1234 mark--; 1235 start = mark*extmem+extrowcum[mark]; 1236 size = ii[i+1]-ii[i]; 1237 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1238 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1239 extrowcum[mark] += size; 1240 } 1241 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1242 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1243 ierr = PetscFree(marks);CHKERRQ(ierr); 1244 1245 /* Compress extrows */ 1246 cum = 0; 1247 for (i=0;i<nee;i++) { 1248 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1249 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1250 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1251 cum = PetscMax(cum,size); 1252 } 1253 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1254 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1256 1257 /* Workspace for lapack inner calls and VecSetValues */ 1258 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1259 1260 /* Create change of basis matrix (preallocation can be improved) */ 1261 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1262 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1263 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1264 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1265 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1266 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1267 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1268 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1271 1272 /* Defaults to identity */ 1273 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1274 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1275 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1276 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1277 1278 /* Create discrete gradient for the coarser level if needed */ 1279 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1280 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1281 if (pcbddc->current_level < pcbddc->max_levels) { 1282 ISLocalToGlobalMapping cel2g,cvl2g; 1283 IS wis,gwis; 1284 PetscInt cnv,cne; 1285 1286 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1287 if (fl2g) { 1288 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1289 } else { 1290 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1291 pcbddc->nedclocal = wis; 1292 } 1293 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1294 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1295 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1296 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1297 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1298 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1299 1300 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1301 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1302 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1303 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1304 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1305 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1306 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1307 1308 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1309 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1310 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1311 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1312 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1313 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1314 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1316 } 1317 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1318 1319 #if defined(PRINT_GDET) 1320 inc = 0; 1321 lev = pcbddc->current_level; 1322 #endif 1323 1324 /* Insert values in the change of basis matrix */ 1325 for (i=0;i<nee;i++) { 1326 Mat Gins = NULL, GKins = NULL; 1327 IS cornersis = NULL; 1328 PetscScalar cvals[2]; 1329 1330 if (pcbddc->nedcG) { 1331 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1332 } 1333 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1334 if (Gins && GKins) { 1335 PetscScalar *data; 1336 const PetscInt *rows,*cols; 1337 PetscInt nrh,nch,nrc,ncc; 1338 1339 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1340 /* H1 */ 1341 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1342 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1343 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1344 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1345 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1346 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1347 /* complement */ 1348 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1349 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1350 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1351 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1352 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1353 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1354 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1355 1356 /* coarse discrete gradient */ 1357 if (pcbddc->nedcG) { 1358 PetscInt cols[2]; 1359 1360 cols[0] = 2*i; 1361 cols[1] = 2*i+1; 1362 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1363 } 1364 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1365 } 1366 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1367 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1369 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1370 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1371 } 1372 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1373 1374 /* Start assembling */ 1375 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1376 if (pcbddc->nedcG) { 1377 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1378 } 1379 1380 /* Free */ 1381 if (fl2g) { 1382 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1383 for (i=0;i<nee;i++) { 1384 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1385 } 1386 ierr = PetscFree(eedges);CHKERRQ(ierr); 1387 } 1388 1389 /* hack mat_graph with primal dofs on the coarse edges */ 1390 { 1391 PCBDDCGraph graph = pcbddc->mat_graph; 1392 PetscInt *oqueue = graph->queue; 1393 PetscInt *ocptr = graph->cptr; 1394 PetscInt ncc,*idxs; 1395 1396 /* find first primal edge */ 1397 if (pcbddc->nedclocal) { 1398 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1399 } else { 1400 if (fl2g) { 1401 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1402 } 1403 idxs = cedges; 1404 } 1405 cum = 0; 1406 while (cum < nee && cedges[cum] < 0) cum++; 1407 1408 /* adapt connected components */ 1409 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1410 graph->cptr[0] = 0; 1411 for (i=0,ncc=0;i<graph->ncc;i++) { 1412 PetscInt lc = ocptr[i+1]-ocptr[i]; 1413 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1414 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1415 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1416 ncc++; 1417 lc--; 1418 cum++; 1419 while (cum < nee && cedges[cum] < 0) cum++; 1420 } 1421 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1422 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1423 ncc++; 1424 } 1425 graph->ncc = ncc; 1426 if (pcbddc->nedclocal) { 1427 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1428 } 1429 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1430 } 1431 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1432 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1434 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1435 1436 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1437 ierr = PetscFree(extrow);CHKERRQ(ierr); 1438 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1439 ierr = PetscFree(corners);CHKERRQ(ierr); 1440 ierr = PetscFree(cedges);CHKERRQ(ierr); 1441 ierr = PetscFree(extrows);CHKERRQ(ierr); 1442 ierr = PetscFree(extcols);CHKERRQ(ierr); 1443 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1444 1445 /* Complete assembling */ 1446 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 if (pcbddc->nedcG) { 1448 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1449 #if 0 1450 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1451 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1452 #endif 1453 } 1454 1455 /* set change of basis */ 1456 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1457 ierr = MatDestroy(&T);CHKERRQ(ierr); 1458 1459 PetscFunctionReturn(0); 1460 } 1461 1462 /* the near-null space of BDDC carries information on quadrature weights, 1463 and these can be collinear -> so cheat with MatNullSpaceCreate 1464 and create a suitable set of basis vectors first */ 1465 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1466 { 1467 PetscErrorCode ierr; 1468 PetscInt i; 1469 1470 PetscFunctionBegin; 1471 for (i=0;i<nvecs;i++) { 1472 PetscInt first,last; 1473 1474 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1475 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1476 if (i>=first && i < last) { 1477 PetscScalar *data; 1478 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1479 if (!has_const) { 1480 data[i-first] = 1.; 1481 } else { 1482 data[2*i-first] = 1./PetscSqrtReal(2.); 1483 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1484 } 1485 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1486 } 1487 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1488 } 1489 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1490 for (i=0;i<nvecs;i++) { /* reset vectors */ 1491 PetscInt first,last; 1492 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1493 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1494 if (i>=first && i < last) { 1495 PetscScalar *data; 1496 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1497 if (!has_const) { 1498 data[i-first] = 0.; 1499 } else { 1500 data[2*i-first] = 0.; 1501 data[2*i-first+1] = 0.; 1502 } 1503 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1504 } 1505 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1506 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1512 { 1513 Mat loc_divudotp; 1514 Vec p,v,vins,quad_vec,*quad_vecs; 1515 ISLocalToGlobalMapping map; 1516 IS *faces,*edges; 1517 PetscScalar *vals; 1518 const PetscScalar *array; 1519 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1520 PetscMPIInt rank; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1525 if (graph->twodim) { 1526 lmaxneighs = 2; 1527 } else { 1528 lmaxneighs = 1; 1529 for (i=0;i<ne;i++) { 1530 const PetscInt *idxs; 1531 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1532 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1533 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1534 } 1535 lmaxneighs++; /* graph count does not include self */ 1536 } 1537 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1538 maxsize = 0; 1539 for (i=0;i<ne;i++) { 1540 PetscInt nn; 1541 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1542 maxsize = PetscMax(maxsize,nn); 1543 } 1544 for (i=0;i<nf;i++) { 1545 PetscInt nn; 1546 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1547 maxsize = PetscMax(maxsize,nn); 1548 } 1549 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1550 /* create vectors to hold quadrature weights */ 1551 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1552 if (!transpose) { 1553 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1554 } else { 1555 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1556 } 1557 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1558 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1559 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1560 for (i=0;i<maxneighs;i++) { 1561 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1562 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1563 } 1564 1565 /* compute local quad vec */ 1566 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1567 if (!transpose) { 1568 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1569 } else { 1570 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1571 } 1572 ierr = VecSet(p,1.);CHKERRQ(ierr); 1573 if (!transpose) { 1574 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1575 } else { 1576 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1577 } 1578 if (vl2l) { 1579 Mat lA; 1580 VecScatter sc; 1581 1582 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1583 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1584 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1585 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1588 } else { 1589 vins = v; 1590 } 1591 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1592 ierr = VecDestroy(&p);CHKERRQ(ierr); 1593 1594 /* insert in global quadrature vecs */ 1595 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1596 for (i=0;i<nf;i++) { 1597 const PetscInt *idxs; 1598 PetscInt idx,nn,j; 1599 1600 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1601 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1602 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1603 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1604 idx = -(idx+1); 1605 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1606 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1607 } 1608 for (i=0;i<ne;i++) { 1609 const PetscInt *idxs; 1610 PetscInt idx,nn,j; 1611 1612 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1613 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1614 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1615 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1616 idx = -(idx+1); 1617 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1618 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1619 } 1620 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1621 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1622 if (vl2l) { 1623 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1624 } 1625 ierr = VecDestroy(&v);CHKERRQ(ierr); 1626 ierr = PetscFree(vals);CHKERRQ(ierr); 1627 1628 /* assemble near null space */ 1629 for (i=0;i<maxneighs;i++) { 1630 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1631 } 1632 for (i=0;i<maxneighs;i++) { 1633 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1634 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1635 } 1636 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1641 { 1642 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1643 PetscErrorCode ierr; 1644 1645 PetscFunctionBegin; 1646 if (primalv) { 1647 if (pcbddc->user_primal_vertices_local) { 1648 IS list[2], newp; 1649 1650 list[0] = primalv; 1651 list[1] = pcbddc->user_primal_vertices_local; 1652 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1653 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1654 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1655 pcbddc->user_primal_vertices_local = newp; 1656 } else { 1657 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1658 } 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1664 { 1665 PetscErrorCode ierr; 1666 Vec local,global; 1667 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1668 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1669 PetscBool monolithic = PETSC_FALSE; 1670 1671 PetscFunctionBegin; 1672 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1673 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1674 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1675 /* need to convert from global to local topology information and remove references to information in global ordering */ 1676 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1677 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1678 if (monolithic) goto boundary; 1679 1680 if (pcbddc->user_provided_isfordofs) { 1681 if (pcbddc->n_ISForDofs) { 1682 PetscInt i; 1683 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1684 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1685 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1686 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1687 } 1688 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1689 pcbddc->n_ISForDofs = 0; 1690 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1691 } 1692 } else { 1693 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1694 DM dm; 1695 1696 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1697 if (!dm) { 1698 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1699 } 1700 if (dm) { 1701 IS *fields; 1702 PetscInt nf,i; 1703 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1704 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1705 for (i=0;i<nf;i++) { 1706 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1707 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1708 } 1709 ierr = PetscFree(fields);CHKERRQ(ierr); 1710 pcbddc->n_ISForDofsLocal = nf; 1711 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1712 PetscContainer c; 1713 1714 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1715 if (c) { 1716 MatISLocalFields lf; 1717 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1718 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1719 } else { /* fallback, create the default fields if bs > 1 */ 1720 PetscInt i, n = matis->A->rmap->n; 1721 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1722 if (i > 1) { 1723 pcbddc->n_ISForDofsLocal = i; 1724 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1725 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1726 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1727 } 1728 } 1729 } 1730 } 1731 } else { 1732 PetscInt i; 1733 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1734 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1735 } 1736 } 1737 } 1738 1739 boundary: 1740 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1741 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1742 } else if (pcbddc->DirichletBoundariesLocal) { 1743 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1744 } 1745 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1746 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1747 } else if (pcbddc->NeumannBoundariesLocal) { 1748 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1749 } 1750 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1751 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1752 } 1753 ierr = VecDestroy(&global);CHKERRQ(ierr); 1754 ierr = VecDestroy(&local);CHKERRQ(ierr); 1755 /* detect local disconnected subdomains if requested (use matis->A) */ 1756 if (pcbddc->detect_disconnected) { 1757 IS primalv = NULL; 1758 PetscInt i; 1759 1760 for (i=0;i<pcbddc->n_local_subs;i++) { 1761 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1762 } 1763 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1764 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1765 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1766 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1767 } 1768 /* early stage corner detection */ 1769 { 1770 DM dm; 1771 1772 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1773 if (dm) { 1774 PetscBool isda; 1775 1776 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1777 if (isda) { 1778 ISLocalToGlobalMapping l2l; 1779 IS corners; 1780 Mat lA; 1781 1782 ierr = DMDAGetElementsCornersIS(dm,&corners);CHKERRQ(ierr); 1783 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1784 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1785 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1786 if (l2l) { 1787 const PetscInt *idx; 1788 PetscInt bs,*idxout,n; 1789 1790 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1791 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1792 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1793 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1794 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1795 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1796 ierr = DMDARestoreElementsCornersIS(dm,&corners);CHKERRQ(ierr); 1797 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1798 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1799 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1800 } else { /* not from DMDA */ 1801 ierr = DMDARestoreElementsCornersIS(dm,&corners);CHKERRQ(ierr); 1802 } 1803 } 1804 } 1805 } 1806 PetscFunctionReturn(0); 1807 } 1808 1809 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1810 { 1811 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1812 PetscErrorCode ierr; 1813 IS nis; 1814 const PetscInt *idxs; 1815 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1816 PetscBool *ld; 1817 1818 PetscFunctionBegin; 1819 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1820 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1821 if (mop == MPI_LAND) { 1822 /* init rootdata with true */ 1823 ld = (PetscBool*) matis->sf_rootdata; 1824 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1825 } else { 1826 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1827 } 1828 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1829 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1830 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1831 ld = (PetscBool*) matis->sf_leafdata; 1832 for (i=0;i<nd;i++) 1833 if (-1 < idxs[i] && idxs[i] < n) 1834 ld[idxs[i]] = PETSC_TRUE; 1835 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1836 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1837 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1838 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1839 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1840 if (mop == MPI_LAND) { 1841 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1842 } else { 1843 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1844 } 1845 for (i=0,nnd=0;i<n;i++) 1846 if (ld[i]) 1847 nidxs[nnd++] = i; 1848 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1849 ierr = ISDestroy(is);CHKERRQ(ierr); 1850 *is = nis; 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1855 { 1856 PC_IS *pcis = (PC_IS*)(pc->data); 1857 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1858 PetscErrorCode ierr; 1859 1860 PetscFunctionBegin; 1861 if (!pcbddc->benign_have_null) { 1862 PetscFunctionReturn(0); 1863 } 1864 if (pcbddc->ChangeOfBasisMatrix) { 1865 Vec swap; 1866 1867 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1868 swap = pcbddc->work_change; 1869 pcbddc->work_change = r; 1870 r = swap; 1871 } 1872 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1873 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1874 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1875 ierr = VecSet(z,0.);CHKERRQ(ierr); 1876 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1877 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1878 if (pcbddc->ChangeOfBasisMatrix) { 1879 pcbddc->work_change = r; 1880 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1881 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1882 } 1883 PetscFunctionReturn(0); 1884 } 1885 1886 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1887 { 1888 PCBDDCBenignMatMult_ctx ctx; 1889 PetscErrorCode ierr; 1890 PetscBool apply_right,apply_left,reset_x; 1891 1892 PetscFunctionBegin; 1893 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1894 if (transpose) { 1895 apply_right = ctx->apply_left; 1896 apply_left = ctx->apply_right; 1897 } else { 1898 apply_right = ctx->apply_right; 1899 apply_left = ctx->apply_left; 1900 } 1901 reset_x = PETSC_FALSE; 1902 if (apply_right) { 1903 const PetscScalar *ax; 1904 PetscInt nl,i; 1905 1906 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1907 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1908 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1909 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1910 for (i=0;i<ctx->benign_n;i++) { 1911 PetscScalar sum,val; 1912 const PetscInt *idxs; 1913 PetscInt nz,j; 1914 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1915 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1916 sum = 0.; 1917 if (ctx->apply_p0) { 1918 val = ctx->work[idxs[nz-1]]; 1919 for (j=0;j<nz-1;j++) { 1920 sum += ctx->work[idxs[j]]; 1921 ctx->work[idxs[j]] += val; 1922 } 1923 } else { 1924 for (j=0;j<nz-1;j++) { 1925 sum += ctx->work[idxs[j]]; 1926 } 1927 } 1928 ctx->work[idxs[nz-1]] -= sum; 1929 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1930 } 1931 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1932 reset_x = PETSC_TRUE; 1933 } 1934 if (transpose) { 1935 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1936 } else { 1937 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1938 } 1939 if (reset_x) { 1940 ierr = VecResetArray(x);CHKERRQ(ierr); 1941 } 1942 if (apply_left) { 1943 PetscScalar *ay; 1944 PetscInt i; 1945 1946 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1947 for (i=0;i<ctx->benign_n;i++) { 1948 PetscScalar sum,val; 1949 const PetscInt *idxs; 1950 PetscInt nz,j; 1951 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1952 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1953 val = -ay[idxs[nz-1]]; 1954 if (ctx->apply_p0) { 1955 sum = 0.; 1956 for (j=0;j<nz-1;j++) { 1957 sum += ay[idxs[j]]; 1958 ay[idxs[j]] += val; 1959 } 1960 ay[idxs[nz-1]] += sum; 1961 } else { 1962 for (j=0;j<nz-1;j++) { 1963 ay[idxs[j]] += val; 1964 } 1965 ay[idxs[nz-1]] = 0.; 1966 } 1967 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1968 } 1969 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1970 } 1971 PetscFunctionReturn(0); 1972 } 1973 1974 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1975 { 1976 PetscErrorCode ierr; 1977 1978 PetscFunctionBegin; 1979 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1980 PetscFunctionReturn(0); 1981 } 1982 1983 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1984 { 1985 PetscErrorCode ierr; 1986 1987 PetscFunctionBegin; 1988 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1989 PetscFunctionReturn(0); 1990 } 1991 1992 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1993 { 1994 PC_IS *pcis = (PC_IS*)pc->data; 1995 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1996 PCBDDCBenignMatMult_ctx ctx; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 if (!restore) { 2001 Mat A_IB,A_BI; 2002 PetscScalar *work; 2003 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2004 2005 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2006 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2007 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2008 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2009 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2010 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2011 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2012 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2013 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2014 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2015 ctx->apply_left = PETSC_TRUE; 2016 ctx->apply_right = PETSC_FALSE; 2017 ctx->apply_p0 = PETSC_FALSE; 2018 ctx->benign_n = pcbddc->benign_n; 2019 if (reuse) { 2020 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2021 ctx->free = PETSC_FALSE; 2022 } else { /* TODO: could be optimized for successive solves */ 2023 ISLocalToGlobalMapping N_to_D; 2024 PetscInt i; 2025 2026 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2027 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2028 for (i=0;i<pcbddc->benign_n;i++) { 2029 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2030 } 2031 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2032 ctx->free = PETSC_TRUE; 2033 } 2034 ctx->A = pcis->A_IB; 2035 ctx->work = work; 2036 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2037 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2038 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2039 pcis->A_IB = A_IB; 2040 2041 /* A_BI as A_IB^T */ 2042 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2043 pcbddc->benign_original_mat = pcis->A_BI; 2044 pcis->A_BI = A_BI; 2045 } else { 2046 if (!pcbddc->benign_original_mat) { 2047 PetscFunctionReturn(0); 2048 } 2049 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2050 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2051 pcis->A_IB = ctx->A; 2052 ctx->A = NULL; 2053 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2054 pcis->A_BI = pcbddc->benign_original_mat; 2055 pcbddc->benign_original_mat = NULL; 2056 if (ctx->free) { 2057 PetscInt i; 2058 for (i=0;i<ctx->benign_n;i++) { 2059 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2060 } 2061 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2062 } 2063 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2064 ierr = PetscFree(ctx);CHKERRQ(ierr); 2065 } 2066 PetscFunctionReturn(0); 2067 } 2068 2069 /* used just in bddc debug mode */ 2070 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2071 { 2072 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2073 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2074 Mat An; 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2079 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2080 if (is1) { 2081 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2082 ierr = MatDestroy(&An);CHKERRQ(ierr); 2083 } else { 2084 *B = An; 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 /* TODO: add reuse flag */ 2090 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2091 { 2092 Mat Bt; 2093 PetscScalar *a,*bdata; 2094 const PetscInt *ii,*ij; 2095 PetscInt m,n,i,nnz,*bii,*bij; 2096 PetscBool flg_row; 2097 PetscErrorCode ierr; 2098 2099 PetscFunctionBegin; 2100 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2101 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2102 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2103 nnz = n; 2104 for (i=0;i<ii[n];i++) { 2105 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2106 } 2107 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2108 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2109 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2110 nnz = 0; 2111 bii[0] = 0; 2112 for (i=0;i<n;i++) { 2113 PetscInt j; 2114 for (j=ii[i];j<ii[i+1];j++) { 2115 PetscScalar entry = a[j]; 2116 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2117 bij[nnz] = ij[j]; 2118 bdata[nnz] = entry; 2119 nnz++; 2120 } 2121 } 2122 bii[i+1] = nnz; 2123 } 2124 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2125 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2126 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2127 { 2128 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2129 b->free_a = PETSC_TRUE; 2130 b->free_ij = PETSC_TRUE; 2131 } 2132 *B = Bt; 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2137 { 2138 Mat B = NULL; 2139 DM dm; 2140 IS is_dummy,*cc_n; 2141 ISLocalToGlobalMapping l2gmap_dummy; 2142 PCBDDCGraph graph; 2143 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2144 PetscInt i,n; 2145 PetscInt *xadj,*adjncy; 2146 PetscBool isplex = PETSC_FALSE; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 if (ncc) *ncc = 0; 2151 if (cc) *cc = NULL; 2152 if (primalv) *primalv = NULL; 2153 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2154 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2155 if (!dm) { 2156 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2157 } 2158 if (dm) { 2159 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2160 } 2161 if (isplex) { /* this code has been modified from plexpartition.c */ 2162 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2163 PetscInt *adj = NULL; 2164 IS cellNumbering; 2165 const PetscInt *cellNum; 2166 PetscBool useCone, useClosure; 2167 PetscSection section; 2168 PetscSegBuffer adjBuffer; 2169 PetscSF sfPoint; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2174 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2175 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2176 /* Build adjacency graph via a section/segbuffer */ 2177 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2178 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2179 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2180 /* Always use FVM adjacency to create partitioner graph */ 2181 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2182 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2183 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2184 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2185 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2186 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2187 for (n = 0, p = pStart; p < pEnd; p++) { 2188 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2189 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2190 adjSize = PETSC_DETERMINE; 2191 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2192 for (a = 0; a < adjSize; ++a) { 2193 const PetscInt point = adj[a]; 2194 if (pStart <= point && point < pEnd) { 2195 PetscInt *PETSC_RESTRICT pBuf; 2196 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2197 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2198 *pBuf = point; 2199 } 2200 } 2201 n++; 2202 } 2203 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2204 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2205 /* Derive CSR graph from section/segbuffer */ 2206 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2207 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2208 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2209 for (idx = 0, p = pStart; p < pEnd; p++) { 2210 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2211 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2212 } 2213 xadj[n] = size; 2214 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2215 /* Clean up */ 2216 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2217 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2218 ierr = PetscFree(adj);CHKERRQ(ierr); 2219 graph->xadj = xadj; 2220 graph->adjncy = adjncy; 2221 } else { 2222 Mat A; 2223 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2224 2225 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2226 if (!A->rmap->N || !A->cmap->N) { 2227 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2231 if (!isseqaij && filter) { 2232 PetscBool isseqdense; 2233 2234 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2235 if (!isseqdense) { 2236 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2237 } else { /* TODO: rectangular case and LDA */ 2238 PetscScalar *array; 2239 PetscReal chop=1.e-6; 2240 2241 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2242 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2243 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2244 for (i=0;i<n;i++) { 2245 PetscInt j; 2246 for (j=i+1;j<n;j++) { 2247 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2248 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2249 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2250 } 2251 } 2252 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2253 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2254 } 2255 } else { 2256 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2257 B = A; 2258 } 2259 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2260 2261 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2262 if (filter) { 2263 PetscScalar *data; 2264 PetscInt j,cum; 2265 2266 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2267 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2268 cum = 0; 2269 for (i=0;i<n;i++) { 2270 PetscInt t; 2271 2272 for (j=xadj[i];j<xadj[i+1];j++) { 2273 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2274 continue; 2275 } 2276 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2277 } 2278 t = xadj_filtered[i]; 2279 xadj_filtered[i] = cum; 2280 cum += t; 2281 } 2282 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2283 graph->xadj = xadj_filtered; 2284 graph->adjncy = adjncy_filtered; 2285 } else { 2286 graph->xadj = xadj; 2287 graph->adjncy = adjncy; 2288 } 2289 } 2290 /* compute local connected components using PCBDDCGraph */ 2291 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2292 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2293 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2294 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2295 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2296 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2297 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2298 2299 /* partial clean up */ 2300 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2301 if (B) { 2302 PetscBool flg_row; 2303 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2304 ierr = MatDestroy(&B);CHKERRQ(ierr); 2305 } 2306 if (isplex) { 2307 ierr = PetscFree(xadj);CHKERRQ(ierr); 2308 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2309 } 2310 2311 /* get back data */ 2312 if (isplex) { 2313 if (ncc) *ncc = graph->ncc; 2314 if (cc || primalv) { 2315 Mat A; 2316 PetscBT btv,btvt; 2317 PetscSection subSection; 2318 PetscInt *ids,cum,cump,*cids,*pids; 2319 2320 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2321 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2322 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2323 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2324 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2325 2326 cids[0] = 0; 2327 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2328 PetscInt j; 2329 2330 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2331 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2332 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2333 2334 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2335 for (k = 0; k < 2*size; k += 2) { 2336 PetscInt s, p = closure[k], off, dof, cdof; 2337 2338 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2339 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2340 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2341 for (s = 0; s < dof-cdof; s++) { 2342 if (PetscBTLookupSet(btvt,off+s)) continue; 2343 if (!PetscBTLookup(btv,off+s)) { 2344 ids[cum++] = off+s; 2345 } else { /* cross-vertex */ 2346 pids[cump++] = off+s; 2347 } 2348 } 2349 } 2350 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2351 } 2352 cids[i+1] = cum; 2353 /* mark dofs as already assigned */ 2354 for (j = cids[i]; j < cids[i+1]; j++) { 2355 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2356 } 2357 } 2358 if (cc) { 2359 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2360 for (i = 0; i < graph->ncc; i++) { 2361 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2362 } 2363 *cc = cc_n; 2364 } 2365 if (primalv) { 2366 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2367 } 2368 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2369 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2370 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2371 } 2372 } else { 2373 if (ncc) *ncc = graph->ncc; 2374 if (cc) { 2375 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2376 for (i=0;i<graph->ncc;i++) { 2377 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2378 } 2379 *cc = cc_n; 2380 } 2381 } 2382 /* clean up graph */ 2383 graph->xadj = 0; 2384 graph->adjncy = 0; 2385 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2390 { 2391 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2392 PC_IS* pcis = (PC_IS*)(pc->data); 2393 IS dirIS = NULL; 2394 PetscInt i; 2395 PetscErrorCode ierr; 2396 2397 PetscFunctionBegin; 2398 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2399 if (zerodiag) { 2400 Mat A; 2401 Vec vec3_N; 2402 PetscScalar *vals; 2403 const PetscInt *idxs; 2404 PetscInt nz,*count; 2405 2406 /* p0 */ 2407 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2408 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2409 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2410 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2411 for (i=0;i<nz;i++) vals[i] = 1.; 2412 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2413 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2414 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2415 /* v_I */ 2416 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2417 for (i=0;i<nz;i++) vals[i] = 0.; 2418 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2419 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2420 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2421 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2422 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2423 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2424 if (dirIS) { 2425 PetscInt n; 2426 2427 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2428 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2429 for (i=0;i<n;i++) vals[i] = 0.; 2430 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2431 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2432 } 2433 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2434 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2435 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2436 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2437 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2438 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2439 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2440 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2441 ierr = PetscFree(vals);CHKERRQ(ierr); 2442 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2443 2444 /* there should not be any pressure dofs lying on the interface */ 2445 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2446 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2447 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2448 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2449 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2450 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2451 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2452 ierr = PetscFree(count);CHKERRQ(ierr); 2453 } 2454 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2455 2456 /* check PCBDDCBenignGetOrSetP0 */ 2457 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2458 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2459 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2460 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2461 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2462 for (i=0;i<pcbddc->benign_n;i++) { 2463 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2464 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2465 } 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2470 { 2471 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2472 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2473 PetscInt nz,n; 2474 PetscInt *interior_dofs,n_interior_dofs,nneu; 2475 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2480 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2481 for (n=0;n<pcbddc->benign_n;n++) { 2482 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2483 } 2484 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2485 pcbddc->benign_n = 0; 2486 2487 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2488 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2489 Checks if all the pressure dofs in each subdomain have a zero diagonal 2490 If not, a change of basis on pressures is not needed 2491 since the local Schur complements are already SPD 2492 */ 2493 has_null_pressures = PETSC_TRUE; 2494 have_null = PETSC_TRUE; 2495 if (pcbddc->n_ISForDofsLocal) { 2496 IS iP = NULL; 2497 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2498 2499 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2500 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2501 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2502 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2503 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2504 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2505 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2506 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2507 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2508 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2509 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2510 if (iP) { 2511 IS newpressures; 2512 2513 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2514 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2515 pressures = newpressures; 2516 } 2517 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2518 if (!sorted) { 2519 ierr = ISSort(pressures);CHKERRQ(ierr); 2520 } 2521 } else { 2522 pressures = NULL; 2523 } 2524 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2525 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2526 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2527 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2528 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2529 if (!sorted) { 2530 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2531 } 2532 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2533 zerodiag_save = zerodiag; 2534 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2535 if (!nz) { 2536 if (n) have_null = PETSC_FALSE; 2537 has_null_pressures = PETSC_FALSE; 2538 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2539 } 2540 recompute_zerodiag = PETSC_FALSE; 2541 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2542 zerodiag_subs = NULL; 2543 pcbddc->benign_n = 0; 2544 n_interior_dofs = 0; 2545 interior_dofs = NULL; 2546 nneu = 0; 2547 if (pcbddc->NeumannBoundariesLocal) { 2548 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2549 } 2550 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2551 if (checkb) { /* need to compute interior nodes */ 2552 PetscInt n,i,j; 2553 PetscInt n_neigh,*neigh,*n_shared,**shared; 2554 PetscInt *iwork; 2555 2556 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2557 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2558 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2559 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2560 for (i=1;i<n_neigh;i++) 2561 for (j=0;j<n_shared[i];j++) 2562 iwork[shared[i][j]] += 1; 2563 for (i=0;i<n;i++) 2564 if (!iwork[i]) 2565 interior_dofs[n_interior_dofs++] = i; 2566 ierr = PetscFree(iwork);CHKERRQ(ierr); 2567 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2568 } 2569 if (has_null_pressures) { 2570 IS *subs; 2571 PetscInt nsubs,i,j,nl; 2572 const PetscInt *idxs; 2573 PetscScalar *array; 2574 Vec *work; 2575 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2576 2577 subs = pcbddc->local_subs; 2578 nsubs = pcbddc->n_local_subs; 2579 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2580 if (checkb) { 2581 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2582 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2583 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2584 /* work[0] = 1_p */ 2585 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2586 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2587 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2588 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2589 /* work[0] = 1_v */ 2590 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2591 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2592 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2593 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2594 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2595 } 2596 if (nsubs > 1) { 2597 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2598 for (i=0;i<nsubs;i++) { 2599 ISLocalToGlobalMapping l2g; 2600 IS t_zerodiag_subs; 2601 PetscInt nl; 2602 2603 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2604 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2605 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2606 if (nl) { 2607 PetscBool valid = PETSC_TRUE; 2608 2609 if (checkb) { 2610 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2611 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2612 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2613 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2614 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2615 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2616 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2617 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2618 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2619 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2620 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2621 for (j=0;j<n_interior_dofs;j++) { 2622 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2623 valid = PETSC_FALSE; 2624 break; 2625 } 2626 } 2627 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2628 } 2629 if (valid && nneu) { 2630 const PetscInt *idxs; 2631 PetscInt nzb; 2632 2633 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2634 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2635 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2636 if (nzb) valid = PETSC_FALSE; 2637 } 2638 if (valid && pressures) { 2639 IS t_pressure_subs; 2640 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2641 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2642 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2643 } 2644 if (valid) { 2645 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2646 pcbddc->benign_n++; 2647 } else { 2648 recompute_zerodiag = PETSC_TRUE; 2649 } 2650 } 2651 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2652 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2653 } 2654 } else { /* there's just one subdomain (or zero if they have not been detected */ 2655 PetscBool valid = PETSC_TRUE; 2656 2657 if (nneu) valid = PETSC_FALSE; 2658 if (valid && pressures) { 2659 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2660 } 2661 if (valid && checkb) { 2662 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2663 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2664 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2665 for (j=0;j<n_interior_dofs;j++) { 2666 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2667 valid = PETSC_FALSE; 2668 break; 2669 } 2670 } 2671 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2672 } 2673 if (valid) { 2674 pcbddc->benign_n = 1; 2675 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2676 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2677 zerodiag_subs[0] = zerodiag; 2678 } 2679 } 2680 if (checkb) { 2681 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2682 } 2683 } 2684 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2685 2686 if (!pcbddc->benign_n) { 2687 PetscInt n; 2688 2689 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2690 recompute_zerodiag = PETSC_FALSE; 2691 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2692 if (n) { 2693 has_null_pressures = PETSC_FALSE; 2694 have_null = PETSC_FALSE; 2695 } 2696 } 2697 2698 /* final check for null pressures */ 2699 if (zerodiag && pressures) { 2700 PetscInt nz,np; 2701 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2702 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2703 if (nz != np) have_null = PETSC_FALSE; 2704 } 2705 2706 if (recompute_zerodiag) { 2707 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2708 if (pcbddc->benign_n == 1) { 2709 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2710 zerodiag = zerodiag_subs[0]; 2711 } else { 2712 PetscInt i,nzn,*new_idxs; 2713 2714 nzn = 0; 2715 for (i=0;i<pcbddc->benign_n;i++) { 2716 PetscInt ns; 2717 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2718 nzn += ns; 2719 } 2720 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2721 nzn = 0; 2722 for (i=0;i<pcbddc->benign_n;i++) { 2723 PetscInt ns,*idxs; 2724 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2725 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2726 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2727 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2728 nzn += ns; 2729 } 2730 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2731 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2732 } 2733 have_null = PETSC_FALSE; 2734 } 2735 2736 /* Prepare matrix to compute no-net-flux */ 2737 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2738 Mat A,loc_divudotp; 2739 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2740 IS row,col,isused = NULL; 2741 PetscInt M,N,n,st,n_isused; 2742 2743 if (pressures) { 2744 isused = pressures; 2745 } else { 2746 isused = zerodiag_save; 2747 } 2748 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2749 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2750 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2751 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2752 n_isused = 0; 2753 if (isused) { 2754 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2755 } 2756 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2757 st = st-n_isused; 2758 if (n) { 2759 const PetscInt *gidxs; 2760 2761 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2762 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2763 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2764 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2765 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2766 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2767 } else { 2768 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2769 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2770 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2771 } 2772 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2773 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2774 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2775 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2776 ierr = ISDestroy(&row);CHKERRQ(ierr); 2777 ierr = ISDestroy(&col);CHKERRQ(ierr); 2778 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2779 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2780 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2781 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2782 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2783 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2784 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2785 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2786 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2787 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 } 2789 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2790 2791 /* change of basis and p0 dofs */ 2792 if (has_null_pressures) { 2793 IS zerodiagc; 2794 const PetscInt *idxs,*idxsc; 2795 PetscInt i,s,*nnz; 2796 2797 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2798 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2799 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2800 /* local change of basis for pressures */ 2801 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2802 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2803 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2804 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2805 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2806 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2807 for (i=0;i<pcbddc->benign_n;i++) { 2808 PetscInt nzs,j; 2809 2810 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2811 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2812 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2813 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2814 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2815 } 2816 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2817 ierr = PetscFree(nnz);CHKERRQ(ierr); 2818 /* set identity on velocities */ 2819 for (i=0;i<n-nz;i++) { 2820 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2821 } 2822 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2823 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2824 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2825 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2826 /* set change on pressures */ 2827 for (s=0;s<pcbddc->benign_n;s++) { 2828 PetscScalar *array; 2829 PetscInt nzs; 2830 2831 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2832 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2833 for (i=0;i<nzs-1;i++) { 2834 PetscScalar vals[2]; 2835 PetscInt cols[2]; 2836 2837 cols[0] = idxs[i]; 2838 cols[1] = idxs[nzs-1]; 2839 vals[0] = 1.; 2840 vals[1] = 1.; 2841 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2842 } 2843 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2844 for (i=0;i<nzs-1;i++) array[i] = -1.; 2845 array[nzs-1] = 1.; 2846 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2847 /* store local idxs for p0 */ 2848 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2849 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2850 ierr = PetscFree(array);CHKERRQ(ierr); 2851 } 2852 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2853 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 /* project if needed */ 2855 if (pcbddc->benign_change_explicit) { 2856 Mat M; 2857 2858 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2859 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2860 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2861 ierr = MatDestroy(&M);CHKERRQ(ierr); 2862 } 2863 /* store global idxs for p0 */ 2864 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2865 } 2866 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2867 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2868 2869 /* determines if the coarse solver will be singular or not */ 2870 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2871 /* determines if the problem has subdomains with 0 pressure block */ 2872 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2873 *zerodiaglocal = zerodiag; 2874 PetscFunctionReturn(0); 2875 } 2876 2877 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2878 { 2879 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2880 PetscScalar *array; 2881 PetscErrorCode ierr; 2882 2883 PetscFunctionBegin; 2884 if (!pcbddc->benign_sf) { 2885 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2886 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2887 } 2888 if (get) { 2889 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2890 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2891 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2892 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2893 } else { 2894 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2895 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2896 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2897 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2898 } 2899 PetscFunctionReturn(0); 2900 } 2901 2902 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2903 { 2904 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2905 PetscErrorCode ierr; 2906 2907 PetscFunctionBegin; 2908 /* TODO: add error checking 2909 - avoid nested pop (or push) calls. 2910 - cannot push before pop. 2911 - cannot call this if pcbddc->local_mat is NULL 2912 */ 2913 if (!pcbddc->benign_n) { 2914 PetscFunctionReturn(0); 2915 } 2916 if (pop) { 2917 if (pcbddc->benign_change_explicit) { 2918 IS is_p0; 2919 MatReuse reuse; 2920 2921 /* extract B_0 */ 2922 reuse = MAT_INITIAL_MATRIX; 2923 if (pcbddc->benign_B0) { 2924 reuse = MAT_REUSE_MATRIX; 2925 } 2926 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2927 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2928 /* remove rows and cols from local problem */ 2929 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2930 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2931 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2932 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2933 } else { 2934 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2935 PetscScalar *vals; 2936 PetscInt i,n,*idxs_ins; 2937 2938 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2939 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2940 if (!pcbddc->benign_B0) { 2941 PetscInt *nnz; 2942 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2943 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2944 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2945 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2946 for (i=0;i<pcbddc->benign_n;i++) { 2947 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2948 nnz[i] = n - nnz[i]; 2949 } 2950 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2951 ierr = PetscFree(nnz);CHKERRQ(ierr); 2952 } 2953 2954 for (i=0;i<pcbddc->benign_n;i++) { 2955 PetscScalar *array; 2956 PetscInt *idxs,j,nz,cum; 2957 2958 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2959 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2960 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2961 for (j=0;j<nz;j++) vals[j] = 1.; 2962 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2963 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2964 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2965 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2966 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2967 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2968 cum = 0; 2969 for (j=0;j<n;j++) { 2970 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2971 vals[cum] = array[j]; 2972 idxs_ins[cum] = j; 2973 cum++; 2974 } 2975 } 2976 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2977 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2978 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2979 } 2980 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2981 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2983 } 2984 } else { /* push */ 2985 if (pcbddc->benign_change_explicit) { 2986 PetscInt i; 2987 2988 for (i=0;i<pcbddc->benign_n;i++) { 2989 PetscScalar *B0_vals; 2990 PetscInt *B0_cols,B0_ncol; 2991 2992 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2993 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2994 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2995 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2996 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2997 } 2998 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2999 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 } else { 3001 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3002 } 3003 } 3004 PetscFunctionReturn(0); 3005 } 3006 3007 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3008 { 3009 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3010 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3011 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3012 PetscBLASInt *B_iwork,*B_ifail; 3013 PetscScalar *work,lwork; 3014 PetscScalar *St,*S,*eigv; 3015 PetscScalar *Sarray,*Starray; 3016 PetscReal *eigs,thresh; 3017 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3018 PetscBool allocated_S_St; 3019 #if defined(PETSC_USE_COMPLEX) 3020 PetscReal *rwork; 3021 #endif 3022 PetscErrorCode ierr; 3023 3024 PetscFunctionBegin; 3025 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3026 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3027 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 3028 3029 if (pcbddc->dbg_flag) { 3030 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3031 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3032 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3033 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3034 } 3035 3036 if (pcbddc->dbg_flag) { 3037 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3038 } 3039 3040 /* max size of subsets */ 3041 mss = 0; 3042 for (i=0;i<sub_schurs->n_subs;i++) { 3043 PetscInt subset_size; 3044 3045 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3046 mss = PetscMax(mss,subset_size); 3047 } 3048 3049 /* min/max and threshold */ 3050 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3051 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3052 nmax = PetscMax(nmin,nmax); 3053 allocated_S_St = PETSC_FALSE; 3054 if (nmin) { 3055 allocated_S_St = PETSC_TRUE; 3056 } 3057 3058 /* allocate lapack workspace */ 3059 cum = cum2 = 0; 3060 maxneigs = 0; 3061 for (i=0;i<sub_schurs->n_subs;i++) { 3062 PetscInt n,subset_size; 3063 3064 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3065 n = PetscMin(subset_size,nmax); 3066 cum += subset_size; 3067 cum2 += subset_size*n; 3068 maxneigs = PetscMax(maxneigs,n); 3069 } 3070 if (mss) { 3071 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3072 PetscBLASInt B_itype = 1; 3073 PetscBLASInt B_N = mss; 3074 PetscReal zero = 0.0; 3075 PetscReal eps = 0.0; /* dlamch? */ 3076 3077 B_lwork = -1; 3078 S = NULL; 3079 St = NULL; 3080 eigs = NULL; 3081 eigv = NULL; 3082 B_iwork = NULL; 3083 B_ifail = NULL; 3084 #if defined(PETSC_USE_COMPLEX) 3085 rwork = NULL; 3086 #endif 3087 thresh = 1.0; 3088 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3089 #if defined(PETSC_USE_COMPLEX) 3090 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3091 #else 3092 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3093 #endif 3094 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3095 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3096 } else { 3097 /* TODO */ 3098 } 3099 } else { 3100 lwork = 0; 3101 } 3102 3103 nv = 0; 3104 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3105 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3106 } 3107 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3108 if (allocated_S_St) { 3109 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3110 } 3111 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3112 #if defined(PETSC_USE_COMPLEX) 3113 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3114 #endif 3115 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3116 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3117 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3118 nv+cum,&pcbddc->adaptive_constraints_idxs, 3119 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3120 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3121 3122 maxneigs = 0; 3123 cum = cumarray = 0; 3124 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3125 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3126 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3127 const PetscInt *idxs; 3128 3129 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3130 for (cum=0;cum<nv;cum++) { 3131 pcbddc->adaptive_constraints_n[cum] = 1; 3132 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3133 pcbddc->adaptive_constraints_data[cum] = 1.0; 3134 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3135 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3136 } 3137 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3138 } 3139 3140 if (mss) { /* multilevel */ 3141 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3142 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3143 } 3144 3145 thresh = pcbddc->adaptive_threshold; 3146 for (i=0;i<sub_schurs->n_subs;i++) { 3147 const PetscInt *idxs; 3148 PetscReal upper,lower; 3149 PetscInt j,subset_size,eigs_start = 0; 3150 PetscBLASInt B_N; 3151 PetscBool same_data = PETSC_FALSE; 3152 3153 if (pcbddc->use_deluxe_scaling) { 3154 upper = PETSC_MAX_REAL; 3155 lower = thresh; 3156 } else { 3157 upper = 1./thresh; 3158 lower = 0.; 3159 } 3160 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3161 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3162 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3163 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3164 if (sub_schurs->is_hermitian) { 3165 PetscInt j,k; 3166 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3167 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3168 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3169 } 3170 for (j=0;j<subset_size;j++) { 3171 for (k=j;k<subset_size;k++) { 3172 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3173 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3174 } 3175 } 3176 } else { 3177 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3178 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3179 } 3180 } else { 3181 S = Sarray + cumarray; 3182 St = Starray + cumarray; 3183 } 3184 /* see if we can save some work */ 3185 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3186 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3187 } 3188 3189 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3190 B_neigs = 0; 3191 } else { 3192 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3193 PetscBLASInt B_itype = 1; 3194 PetscBLASInt B_IL, B_IU; 3195 PetscReal eps = -1.0; /* dlamch? */ 3196 PetscInt nmin_s; 3197 PetscBool compute_range = PETSC_FALSE; 3198 3199 if (pcbddc->dbg_flag) { 3200 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 3201 } 3202 3203 compute_range = PETSC_FALSE; 3204 if (thresh > 1.+PETSC_SMALL && !same_data) { 3205 compute_range = PETSC_TRUE; 3206 } 3207 3208 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3209 if (compute_range) { 3210 3211 /* ask for eigenvalues larger than thresh */ 3212 #if defined(PETSC_USE_COMPLEX) 3213 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3214 #else 3215 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3216 #endif 3217 } else if (!same_data) { 3218 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3219 B_IL = 1; 3220 #if defined(PETSC_USE_COMPLEX) 3221 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3222 #else 3223 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3224 #endif 3225 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3226 PetscInt k; 3227 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3228 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3229 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3230 nmin = nmax; 3231 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3232 for (k=0;k<nmax;k++) { 3233 eigs[k] = 1./PETSC_SMALL; 3234 eigv[k*(subset_size+1)] = 1.0; 3235 } 3236 } 3237 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3238 if (B_ierr) { 3239 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3240 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3241 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3242 } 3243 3244 if (B_neigs > nmax) { 3245 if (pcbddc->dbg_flag) { 3246 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3247 } 3248 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3249 B_neigs = nmax; 3250 } 3251 3252 nmin_s = PetscMin(nmin,B_N); 3253 if (B_neigs < nmin_s) { 3254 PetscBLASInt B_neigs2; 3255 3256 if (pcbddc->use_deluxe_scaling) { 3257 B_IL = B_N - nmin_s + 1; 3258 B_IU = B_N - B_neigs; 3259 } else { 3260 B_IL = B_neigs + 1; 3261 B_IU = nmin_s; 3262 } 3263 if (pcbddc->dbg_flag) { 3264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3265 } 3266 if (sub_schurs->is_hermitian) { 3267 PetscInt j,k; 3268 for (j=0;j<subset_size;j++) { 3269 for (k=j;k<subset_size;k++) { 3270 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3271 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3272 } 3273 } 3274 } else { 3275 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3276 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3277 } 3278 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3279 #if defined(PETSC_USE_COMPLEX) 3280 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3281 #else 3282 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3283 #endif 3284 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3285 B_neigs += B_neigs2; 3286 } 3287 if (B_ierr) { 3288 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3289 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3290 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3291 } 3292 if (pcbddc->dbg_flag) { 3293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3294 for (j=0;j<B_neigs;j++) { 3295 if (eigs[j] == 0.0) { 3296 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3297 } else { 3298 if (pcbddc->use_deluxe_scaling) { 3299 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3300 } else { 3301 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3302 } 3303 } 3304 } 3305 } 3306 } else { 3307 /* TODO */ 3308 } 3309 } 3310 /* change the basis back to the original one */ 3311 if (sub_schurs->change) { 3312 Mat change,phi,phit; 3313 3314 if (pcbddc->dbg_flag > 2) { 3315 PetscInt ii; 3316 for (ii=0;ii<B_neigs;ii++) { 3317 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3318 for (j=0;j<B_N;j++) { 3319 #if defined(PETSC_USE_COMPLEX) 3320 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3321 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3322 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3323 #else 3324 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3325 #endif 3326 } 3327 } 3328 } 3329 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3330 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3331 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3332 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3333 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3334 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3335 } 3336 maxneigs = PetscMax(B_neigs,maxneigs); 3337 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3338 if (B_neigs) { 3339 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3340 3341 if (pcbddc->dbg_flag > 1) { 3342 PetscInt ii; 3343 for (ii=0;ii<B_neigs;ii++) { 3344 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3345 for (j=0;j<B_N;j++) { 3346 #if defined(PETSC_USE_COMPLEX) 3347 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3348 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3349 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3350 #else 3351 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3352 #endif 3353 } 3354 } 3355 } 3356 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3357 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3358 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3359 cum++; 3360 } 3361 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3362 /* shift for next computation */ 3363 cumarray += subset_size*subset_size; 3364 } 3365 if (pcbddc->dbg_flag) { 3366 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3367 } 3368 3369 if (mss) { 3370 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3371 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3372 /* destroy matrices (junk) */ 3373 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3374 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3375 } 3376 if (allocated_S_St) { 3377 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3378 } 3379 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3380 #if defined(PETSC_USE_COMPLEX) 3381 ierr = PetscFree(rwork);CHKERRQ(ierr); 3382 #endif 3383 if (pcbddc->dbg_flag) { 3384 PetscInt maxneigs_r; 3385 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3386 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3387 } 3388 PetscFunctionReturn(0); 3389 } 3390 3391 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3392 { 3393 PetscScalar *coarse_submat_vals; 3394 PetscErrorCode ierr; 3395 3396 PetscFunctionBegin; 3397 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3398 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3399 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3400 3401 /* Setup local neumann solver ksp_R */ 3402 /* PCBDDCSetUpLocalScatters should be called first! */ 3403 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3404 3405 /* 3406 Setup local correction and local part of coarse basis. 3407 Gives back the dense local part of the coarse matrix in column major ordering 3408 */ 3409 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3410 3411 /* Compute total number of coarse nodes and setup coarse solver */ 3412 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3413 3414 /* free */ 3415 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3416 PetscFunctionReturn(0); 3417 } 3418 3419 PetscErrorCode PCBDDCResetCustomization(PC pc) 3420 { 3421 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3422 PetscErrorCode ierr; 3423 3424 PetscFunctionBegin; 3425 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3426 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3427 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3428 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3429 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3430 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3431 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3432 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3433 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3434 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3435 PetscFunctionReturn(0); 3436 } 3437 3438 PetscErrorCode PCBDDCResetTopography(PC pc) 3439 { 3440 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3441 PetscInt i; 3442 PetscErrorCode ierr; 3443 3444 PetscFunctionBegin; 3445 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3446 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3447 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3448 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3449 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3450 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3451 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3452 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3453 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3454 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3455 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3456 for (i=0;i<pcbddc->n_local_subs;i++) { 3457 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3458 } 3459 pcbddc->n_local_subs = 0; 3460 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3461 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3462 pcbddc->graphanalyzed = PETSC_FALSE; 3463 pcbddc->recompute_topography = PETSC_TRUE; 3464 PetscFunctionReturn(0); 3465 } 3466 3467 PetscErrorCode PCBDDCResetSolvers(PC pc) 3468 { 3469 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3470 PetscErrorCode ierr; 3471 3472 PetscFunctionBegin; 3473 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3474 if (pcbddc->coarse_phi_B) { 3475 PetscScalar *array; 3476 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3477 ierr = PetscFree(array);CHKERRQ(ierr); 3478 } 3479 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3480 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3481 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3482 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3483 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3484 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3485 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3486 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3487 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3488 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3489 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3490 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3491 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3492 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3493 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3494 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3495 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3496 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3497 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3498 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3499 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3500 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3501 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3502 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3503 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3504 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3505 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3506 if (pcbddc->benign_zerodiag_subs) { 3507 PetscInt i; 3508 for (i=0;i<pcbddc->benign_n;i++) { 3509 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3510 } 3511 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3512 } 3513 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3514 PetscFunctionReturn(0); 3515 } 3516 3517 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3518 { 3519 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3520 PC_IS *pcis = (PC_IS*)pc->data; 3521 VecType impVecType; 3522 PetscInt n_constraints,n_R,old_size; 3523 PetscErrorCode ierr; 3524 3525 PetscFunctionBegin; 3526 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3527 n_R = pcis->n - pcbddc->n_vertices; 3528 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3529 /* local work vectors (try to avoid unneeded work)*/ 3530 /* R nodes */ 3531 old_size = -1; 3532 if (pcbddc->vec1_R) { 3533 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3534 } 3535 if (n_R != old_size) { 3536 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3537 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3538 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3539 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3540 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3541 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3542 } 3543 /* local primal dofs */ 3544 old_size = -1; 3545 if (pcbddc->vec1_P) { 3546 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3547 } 3548 if (pcbddc->local_primal_size != old_size) { 3549 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3550 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3551 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3552 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3553 } 3554 /* local explicit constraints */ 3555 old_size = -1; 3556 if (pcbddc->vec1_C) { 3557 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3558 } 3559 if (n_constraints && n_constraints != old_size) { 3560 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3561 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3562 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3563 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3564 } 3565 PetscFunctionReturn(0); 3566 } 3567 3568 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3569 { 3570 PetscErrorCode ierr; 3571 /* pointers to pcis and pcbddc */ 3572 PC_IS* pcis = (PC_IS*)pc->data; 3573 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3574 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3575 /* submatrices of local problem */ 3576 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3577 /* submatrices of local coarse problem */ 3578 Mat S_VV,S_CV,S_VC,S_CC; 3579 /* working matrices */ 3580 Mat C_CR; 3581 /* additional working stuff */ 3582 PC pc_R; 3583 Mat F,Brhs = NULL; 3584 Vec dummy_vec; 3585 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3586 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3587 PetscScalar *work; 3588 PetscInt *idx_V_B; 3589 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3590 PetscInt i,n_R,n_D,n_B; 3591 3592 /* some shortcuts to scalars */ 3593 PetscScalar one=1.0,m_one=-1.0; 3594 3595 PetscFunctionBegin; 3596 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3597 3598 /* Set Non-overlapping dimensions */ 3599 n_vertices = pcbddc->n_vertices; 3600 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3601 n_B = pcis->n_B; 3602 n_D = pcis->n - n_B; 3603 n_R = pcis->n - n_vertices; 3604 3605 /* vertices in boundary numbering */ 3606 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3607 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3608 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3609 3610 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3611 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3612 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3613 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3614 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3615 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3616 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3617 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3618 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3619 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3620 3621 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3622 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3623 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3624 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3625 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3626 lda_rhs = n_R; 3627 need_benign_correction = PETSC_FALSE; 3628 if (isLU || isILU || isCHOL) { 3629 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3630 } else if (sub_schurs && sub_schurs->reuse_solver) { 3631 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3632 MatFactorType type; 3633 3634 F = reuse_solver->F; 3635 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3636 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3637 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3638 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3639 } else { 3640 F = NULL; 3641 } 3642 3643 /* determine if we can use a sparse right-hand side */ 3644 sparserhs = PETSC_FALSE; 3645 if (F) { 3646 MatSolverType solver; 3647 3648 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3649 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3650 } 3651 3652 /* allocate workspace */ 3653 n = 0; 3654 if (n_constraints) { 3655 n += lda_rhs*n_constraints; 3656 } 3657 if (n_vertices) { 3658 n = PetscMax(2*lda_rhs*n_vertices,n); 3659 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3660 } 3661 if (!pcbddc->symmetric_primal) { 3662 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3663 } 3664 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3665 3666 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3667 dummy_vec = NULL; 3668 if (need_benign_correction && lda_rhs != n_R && F) { 3669 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3670 } 3671 3672 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3673 if (n_constraints) { 3674 Mat M1,M2,M3,C_B; 3675 IS is_aux; 3676 PetscScalar *array,*array2; 3677 3678 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3679 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3680 3681 /* Extract constraints on R nodes: C_{CR} */ 3682 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3683 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3684 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3685 3686 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3687 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3688 if (!sparserhs) { 3689 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3690 for (i=0;i<n_constraints;i++) { 3691 const PetscScalar *row_cmat_values; 3692 const PetscInt *row_cmat_indices; 3693 PetscInt size_of_constraint,j; 3694 3695 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3696 for (j=0;j<size_of_constraint;j++) { 3697 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3698 } 3699 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3700 } 3701 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3702 } else { 3703 Mat tC_CR; 3704 3705 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3706 if (lda_rhs != n_R) { 3707 PetscScalar *aa; 3708 PetscInt r,*ii,*jj; 3709 PetscBool done; 3710 3711 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3712 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3713 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3714 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3715 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3716 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3717 } else { 3718 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3719 tC_CR = C_CR; 3720 } 3721 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3722 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3723 } 3724 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3725 if (F) { 3726 if (need_benign_correction) { 3727 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3728 3729 /* rhs is already zero on interior dofs, no need to change the rhs */ 3730 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3731 } 3732 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3733 if (need_benign_correction) { 3734 PetscScalar *marr; 3735 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3736 3737 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3738 if (lda_rhs != n_R) { 3739 for (i=0;i<n_constraints;i++) { 3740 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3741 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3742 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3743 } 3744 } else { 3745 for (i=0;i<n_constraints;i++) { 3746 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3747 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3748 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3749 } 3750 } 3751 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3752 } 3753 } else { 3754 PetscScalar *marr; 3755 3756 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3757 for (i=0;i<n_constraints;i++) { 3758 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3759 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3760 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3761 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3762 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3763 } 3764 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3765 } 3766 if (sparserhs) { 3767 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3768 } 3769 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3770 if (!pcbddc->switch_static) { 3771 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3772 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3773 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3774 for (i=0;i<n_constraints;i++) { 3775 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3776 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3777 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3778 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3779 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3780 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3781 } 3782 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3783 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3784 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3785 } else { 3786 if (lda_rhs != n_R) { 3787 IS dummy; 3788 3789 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3790 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3791 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3792 } else { 3793 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3794 pcbddc->local_auxmat2 = local_auxmat2_R; 3795 } 3796 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3797 } 3798 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3799 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3800 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3801 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3802 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3803 if (isCHOL) { 3804 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3805 } else { 3806 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3807 } 3808 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3809 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3810 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3811 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3812 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3813 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3814 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3815 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3816 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3817 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3818 } 3819 3820 /* Get submatrices from subdomain matrix */ 3821 if (n_vertices) { 3822 IS is_aux; 3823 PetscBool isseqaij; 3824 3825 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3826 IS tis; 3827 3828 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3829 ierr = ISSort(tis);CHKERRQ(ierr); 3830 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3831 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3832 } else { 3833 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3834 } 3835 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3836 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3837 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3838 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3839 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3840 } 3841 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3842 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3843 } 3844 3845 /* Matrix of coarse basis functions (local) */ 3846 if (pcbddc->coarse_phi_B) { 3847 PetscInt on_B,on_primal,on_D=n_D; 3848 if (pcbddc->coarse_phi_D) { 3849 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3850 } 3851 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3852 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3853 PetscScalar *marray; 3854 3855 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3856 ierr = PetscFree(marray);CHKERRQ(ierr); 3857 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3858 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3859 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3860 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3861 } 3862 } 3863 3864 if (!pcbddc->coarse_phi_B) { 3865 PetscScalar *marr; 3866 3867 /* memory size */ 3868 n = n_B*pcbddc->local_primal_size; 3869 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3870 if (!pcbddc->symmetric_primal) n *= 2; 3871 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3873 marr += n_B*pcbddc->local_primal_size; 3874 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3875 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3876 marr += n_D*pcbddc->local_primal_size; 3877 } 3878 if (!pcbddc->symmetric_primal) { 3879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3880 marr += n_B*pcbddc->local_primal_size; 3881 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3883 } 3884 } else { 3885 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3886 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3887 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3888 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3889 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3890 } 3891 } 3892 } 3893 3894 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3895 p0_lidx_I = NULL; 3896 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3897 const PetscInt *idxs; 3898 3899 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3900 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3901 for (i=0;i<pcbddc->benign_n;i++) { 3902 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3903 } 3904 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3905 } 3906 3907 /* vertices */ 3908 if (n_vertices) { 3909 PetscBool restoreavr = PETSC_FALSE; 3910 3911 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3912 3913 if (n_R) { 3914 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3915 PetscBLASInt B_N,B_one = 1; 3916 PetscScalar *x,*y; 3917 3918 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3919 if (need_benign_correction) { 3920 ISLocalToGlobalMapping RtoN; 3921 IS is_p0; 3922 PetscInt *idxs_p0,n; 3923 3924 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3925 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3926 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3927 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3928 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3929 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3930 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3931 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3932 } 3933 3934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3935 if (!sparserhs || need_benign_correction) { 3936 if (lda_rhs == n_R) { 3937 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3938 } else { 3939 PetscScalar *av,*array; 3940 const PetscInt *xadj,*adjncy; 3941 PetscInt n; 3942 PetscBool flg_row; 3943 3944 array = work+lda_rhs*n_vertices; 3945 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3946 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3947 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3948 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3949 for (i=0;i<n;i++) { 3950 PetscInt j; 3951 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3952 } 3953 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3954 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3955 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3956 } 3957 if (need_benign_correction) { 3958 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3959 PetscScalar *marr; 3960 3961 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3962 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3963 3964 | 0 0 0 | (V) 3965 L = | 0 0 -1 | (P-p0) 3966 | 0 0 -1 | (p0) 3967 3968 */ 3969 for (i=0;i<reuse_solver->benign_n;i++) { 3970 const PetscScalar *vals; 3971 const PetscInt *idxs,*idxs_zero; 3972 PetscInt n,j,nz; 3973 3974 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3975 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3976 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3977 for (j=0;j<n;j++) { 3978 PetscScalar val = vals[j]; 3979 PetscInt k,col = idxs[j]; 3980 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3981 } 3982 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3983 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3984 } 3985 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3986 } 3987 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3988 Brhs = A_RV; 3989 } else { 3990 Mat tA_RVT,A_RVT; 3991 3992 if (!pcbddc->symmetric_primal) { 3993 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3994 } else { 3995 restoreavr = PETSC_TRUE; 3996 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3997 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3998 A_RVT = A_VR; 3999 } 4000 if (lda_rhs != n_R) { 4001 PetscScalar *aa; 4002 PetscInt r,*ii,*jj; 4003 PetscBool done; 4004 4005 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4006 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4007 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4008 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4009 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4010 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4011 } else { 4012 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4013 tA_RVT = A_RVT; 4014 } 4015 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4016 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4017 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4018 } 4019 if (F) { 4020 /* need to correct the rhs */ 4021 if (need_benign_correction) { 4022 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4023 PetscScalar *marr; 4024 4025 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4026 if (lda_rhs != n_R) { 4027 for (i=0;i<n_vertices;i++) { 4028 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4029 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4030 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4031 } 4032 } else { 4033 for (i=0;i<n_vertices;i++) { 4034 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4035 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4036 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4037 } 4038 } 4039 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4040 } 4041 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4042 if (restoreavr) { 4043 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4044 } 4045 /* need to correct the solution */ 4046 if (need_benign_correction) { 4047 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4048 PetscScalar *marr; 4049 4050 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4051 if (lda_rhs != n_R) { 4052 for (i=0;i<n_vertices;i++) { 4053 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4054 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4055 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4056 } 4057 } else { 4058 for (i=0;i<n_vertices;i++) { 4059 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4060 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4061 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4062 } 4063 } 4064 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4065 } 4066 } else { 4067 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4068 for (i=0;i<n_vertices;i++) { 4069 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4070 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4071 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4072 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4073 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4074 } 4075 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4076 } 4077 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4078 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4079 /* S_VV and S_CV */ 4080 if (n_constraints) { 4081 Mat B; 4082 4083 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4084 for (i=0;i<n_vertices;i++) { 4085 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4086 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4087 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4088 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4089 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4090 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4091 } 4092 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4093 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4094 ierr = MatDestroy(&B);CHKERRQ(ierr); 4095 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4096 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4097 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4098 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4099 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4100 ierr = MatDestroy(&B);CHKERRQ(ierr); 4101 } 4102 if (lda_rhs != n_R) { 4103 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4105 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4106 } 4107 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4108 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4109 if (need_benign_correction) { 4110 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4111 PetscScalar *marr,*sums; 4112 4113 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4114 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4115 for (i=0;i<reuse_solver->benign_n;i++) { 4116 const PetscScalar *vals; 4117 const PetscInt *idxs,*idxs_zero; 4118 PetscInt n,j,nz; 4119 4120 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4121 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4122 for (j=0;j<n_vertices;j++) { 4123 PetscInt k; 4124 sums[j] = 0.; 4125 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4126 } 4127 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4128 for (j=0;j<n;j++) { 4129 PetscScalar val = vals[j]; 4130 PetscInt k; 4131 for (k=0;k<n_vertices;k++) { 4132 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4133 } 4134 } 4135 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4136 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4137 } 4138 ierr = PetscFree(sums);CHKERRQ(ierr); 4139 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4140 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4141 } 4142 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4143 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4144 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4145 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4146 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4147 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4148 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4149 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4150 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4151 } else { 4152 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4153 } 4154 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4155 4156 /* coarse basis functions */ 4157 for (i=0;i<n_vertices;i++) { 4158 PetscScalar *y; 4159 4160 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4161 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4162 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4163 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4164 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4165 y[n_B*i+idx_V_B[i]] = 1.0; 4166 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4167 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4168 4169 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4170 PetscInt j; 4171 4172 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4173 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4174 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4175 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4176 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4177 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4178 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4179 } 4180 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4181 } 4182 /* if n_R == 0 the object is not destroyed */ 4183 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4184 } 4185 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4186 4187 if (n_constraints) { 4188 Mat B; 4189 4190 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4191 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4192 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4193 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4194 if (n_vertices) { 4195 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4196 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4197 } else { 4198 Mat S_VCt; 4199 4200 if (lda_rhs != n_R) { 4201 ierr = MatDestroy(&B);CHKERRQ(ierr); 4202 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4203 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4204 } 4205 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4206 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4207 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4208 } 4209 } 4210 ierr = MatDestroy(&B);CHKERRQ(ierr); 4211 /* coarse basis functions */ 4212 for (i=0;i<n_constraints;i++) { 4213 PetscScalar *y; 4214 4215 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4216 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4217 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4218 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4219 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4220 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4221 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4222 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4223 PetscInt j; 4224 4225 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4226 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4227 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4228 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4229 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4230 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4231 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4232 } 4233 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4234 } 4235 } 4236 if (n_constraints) { 4237 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4238 } 4239 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4240 4241 /* coarse matrix entries relative to B_0 */ 4242 if (pcbddc->benign_n) { 4243 Mat B0_B,B0_BPHI; 4244 IS is_dummy; 4245 PetscScalar *data; 4246 PetscInt j; 4247 4248 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4249 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4250 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4251 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4252 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4253 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4254 for (j=0;j<pcbddc->benign_n;j++) { 4255 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4256 for (i=0;i<pcbddc->local_primal_size;i++) { 4257 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4258 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4259 } 4260 } 4261 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4262 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4263 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4264 } 4265 4266 /* compute other basis functions for non-symmetric problems */ 4267 if (!pcbddc->symmetric_primal) { 4268 Mat B_V=NULL,B_C=NULL; 4269 PetscScalar *marray; 4270 4271 if (n_constraints) { 4272 Mat S_CCT,C_CRT; 4273 4274 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4275 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4276 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4277 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4278 if (n_vertices) { 4279 Mat S_VCT; 4280 4281 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4282 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4283 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4284 } 4285 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4286 } else { 4287 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4288 } 4289 if (n_vertices && n_R) { 4290 PetscScalar *av,*marray; 4291 const PetscInt *xadj,*adjncy; 4292 PetscInt n; 4293 PetscBool flg_row; 4294 4295 /* B_V = B_V - A_VR^T */ 4296 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4297 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4298 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4299 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4300 for (i=0;i<n;i++) { 4301 PetscInt j; 4302 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4303 } 4304 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4305 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4306 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4307 } 4308 4309 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4310 if (n_vertices) { 4311 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4312 for (i=0;i<n_vertices;i++) { 4313 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4314 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4315 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4316 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4317 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4318 } 4319 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4320 } 4321 if (B_C) { 4322 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4323 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4324 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4325 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4326 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4327 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4328 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4329 } 4330 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4331 } 4332 /* coarse basis functions */ 4333 for (i=0;i<pcbddc->local_primal_size;i++) { 4334 PetscScalar *y; 4335 4336 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4337 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4338 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4339 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4340 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4341 if (i<n_vertices) { 4342 y[n_B*i+idx_V_B[i]] = 1.0; 4343 } 4344 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4345 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4346 4347 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4348 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4349 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4350 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4351 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4352 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4353 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4354 } 4355 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4356 } 4357 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4358 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4359 } 4360 4361 /* free memory */ 4362 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4363 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4364 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4365 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4366 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4367 ierr = PetscFree(work);CHKERRQ(ierr); 4368 if (n_vertices) { 4369 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4370 } 4371 if (n_constraints) { 4372 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4373 } 4374 /* Checking coarse_sub_mat and coarse basis functios */ 4375 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4376 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4377 if (pcbddc->dbg_flag) { 4378 Mat coarse_sub_mat; 4379 Mat AUXMAT,TM1,TM2,TM3,TM4; 4380 Mat coarse_phi_D,coarse_phi_B; 4381 Mat coarse_psi_D,coarse_psi_B; 4382 Mat A_II,A_BB,A_IB,A_BI; 4383 Mat C_B,CPHI; 4384 IS is_dummy; 4385 Vec mones; 4386 MatType checkmattype=MATSEQAIJ; 4387 PetscReal real_value; 4388 4389 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4390 Mat A; 4391 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4392 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4393 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4394 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4395 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4396 ierr = MatDestroy(&A);CHKERRQ(ierr); 4397 } else { 4398 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4399 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4400 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4401 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4402 } 4403 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4404 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4405 if (!pcbddc->symmetric_primal) { 4406 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4407 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4408 } 4409 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4410 4411 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4412 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4414 if (!pcbddc->symmetric_primal) { 4415 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4416 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4417 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4418 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4419 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4420 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4421 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4422 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4423 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4424 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4425 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4426 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4427 } else { 4428 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4429 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4430 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4431 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4432 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4433 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4434 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4435 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4436 } 4437 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4438 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4439 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4440 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4441 if (pcbddc->benign_n) { 4442 Mat B0_B,B0_BPHI; 4443 PetscScalar *data,*data2; 4444 PetscInt j; 4445 4446 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4447 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4448 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4449 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4450 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4451 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4452 for (j=0;j<pcbddc->benign_n;j++) { 4453 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4454 for (i=0;i<pcbddc->local_primal_size;i++) { 4455 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4456 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4457 } 4458 } 4459 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4460 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4461 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4462 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4463 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4464 } 4465 #if 0 4466 { 4467 PetscViewer viewer; 4468 char filename[256]; 4469 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4470 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4471 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4472 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4473 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4474 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4475 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4476 if (pcbddc->coarse_phi_B) { 4477 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4478 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4479 } 4480 if (pcbddc->coarse_phi_D) { 4481 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4482 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4483 } 4484 if (pcbddc->coarse_psi_B) { 4485 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4486 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4487 } 4488 if (pcbddc->coarse_psi_D) { 4489 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4490 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4491 } 4492 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4493 } 4494 #endif 4495 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4496 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4497 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4498 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4499 4500 /* check constraints */ 4501 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4502 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4503 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4504 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4505 } else { 4506 PetscScalar *data; 4507 Mat tmat; 4508 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4509 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4510 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4511 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4512 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4513 } 4514 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4515 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4516 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4517 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4519 if (!pcbddc->symmetric_primal) { 4520 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4521 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4522 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4523 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4525 } 4526 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4527 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4528 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4529 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4530 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4531 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4532 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4533 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4534 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4535 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4536 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4537 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4538 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4539 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4540 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4541 if (!pcbddc->symmetric_primal) { 4542 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4543 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4544 } 4545 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4546 } 4547 /* get back data */ 4548 *coarse_submat_vals_n = coarse_submat_vals; 4549 PetscFunctionReturn(0); 4550 } 4551 4552 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4553 { 4554 Mat *work_mat; 4555 IS isrow_s,iscol_s; 4556 PetscBool rsorted,csorted; 4557 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4558 PetscErrorCode ierr; 4559 4560 PetscFunctionBegin; 4561 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4562 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4563 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4564 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4565 4566 if (!rsorted) { 4567 const PetscInt *idxs; 4568 PetscInt *idxs_sorted,i; 4569 4570 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4571 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4572 for (i=0;i<rsize;i++) { 4573 idxs_perm_r[i] = i; 4574 } 4575 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4576 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4577 for (i=0;i<rsize;i++) { 4578 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4579 } 4580 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4581 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4582 } else { 4583 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4584 isrow_s = isrow; 4585 } 4586 4587 if (!csorted) { 4588 if (isrow == iscol) { 4589 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4590 iscol_s = isrow_s; 4591 } else { 4592 const PetscInt *idxs; 4593 PetscInt *idxs_sorted,i; 4594 4595 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4596 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4597 for (i=0;i<csize;i++) { 4598 idxs_perm_c[i] = i; 4599 } 4600 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4601 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4602 for (i=0;i<csize;i++) { 4603 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4604 } 4605 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4606 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4607 } 4608 } else { 4609 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4610 iscol_s = iscol; 4611 } 4612 4613 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4614 4615 if (!rsorted || !csorted) { 4616 Mat new_mat; 4617 IS is_perm_r,is_perm_c; 4618 4619 if (!rsorted) { 4620 PetscInt *idxs_r,i; 4621 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4622 for (i=0;i<rsize;i++) { 4623 idxs_r[idxs_perm_r[i]] = i; 4624 } 4625 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4626 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4627 } else { 4628 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4629 } 4630 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4631 4632 if (!csorted) { 4633 if (isrow_s == iscol_s) { 4634 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4635 is_perm_c = is_perm_r; 4636 } else { 4637 PetscInt *idxs_c,i; 4638 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4639 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4640 for (i=0;i<csize;i++) { 4641 idxs_c[idxs_perm_c[i]] = i; 4642 } 4643 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4644 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4645 } 4646 } else { 4647 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4648 } 4649 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4650 4651 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4652 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4653 work_mat[0] = new_mat; 4654 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4655 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4656 } 4657 4658 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4659 *B = work_mat[0]; 4660 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4661 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4662 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4663 PetscFunctionReturn(0); 4664 } 4665 4666 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4667 { 4668 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4669 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4670 Mat new_mat,lA; 4671 IS is_local,is_global; 4672 PetscInt local_size; 4673 PetscBool isseqaij; 4674 PetscErrorCode ierr; 4675 4676 PetscFunctionBegin; 4677 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4678 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4679 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4680 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4681 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4682 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4683 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4684 4685 /* check */ 4686 if (pcbddc->dbg_flag) { 4687 Vec x,x_change; 4688 PetscReal error; 4689 4690 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4691 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4692 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4693 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4694 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4695 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4696 if (!pcbddc->change_interior) { 4697 const PetscScalar *x,*y,*v; 4698 PetscReal lerror = 0.; 4699 PetscInt i; 4700 4701 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4702 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4703 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4704 for (i=0;i<local_size;i++) 4705 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4706 lerror = PetscAbsScalar(x[i]-y[i]); 4707 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4708 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4709 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4710 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4711 if (error > PETSC_SMALL) { 4712 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4713 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4714 } else { 4715 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4716 } 4717 } 4718 } 4719 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4720 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4721 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4722 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4723 if (error > PETSC_SMALL) { 4724 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4725 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4726 } else { 4727 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4728 } 4729 } 4730 ierr = VecDestroy(&x);CHKERRQ(ierr); 4731 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4732 } 4733 4734 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4735 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4736 4737 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4738 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4739 if (isseqaij) { 4740 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4741 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4742 if (lA) { 4743 Mat work; 4744 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4745 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4746 ierr = MatDestroy(&work);CHKERRQ(ierr); 4747 } 4748 } else { 4749 Mat work_mat; 4750 4751 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4752 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4753 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4754 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4755 if (lA) { 4756 Mat work; 4757 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4758 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4759 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4760 ierr = MatDestroy(&work);CHKERRQ(ierr); 4761 } 4762 } 4763 if (matis->A->symmetric_set) { 4764 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4765 #if !defined(PETSC_USE_COMPLEX) 4766 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4767 #endif 4768 } 4769 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4770 PetscFunctionReturn(0); 4771 } 4772 4773 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4774 { 4775 PC_IS* pcis = (PC_IS*)(pc->data); 4776 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4777 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4778 PetscInt *idx_R_local=NULL; 4779 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4780 PetscInt vbs,bs; 4781 PetscBT bitmask=NULL; 4782 PetscErrorCode ierr; 4783 4784 PetscFunctionBegin; 4785 /* 4786 No need to setup local scatters if 4787 - primal space is unchanged 4788 AND 4789 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4790 AND 4791 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4792 */ 4793 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4794 PetscFunctionReturn(0); 4795 } 4796 /* destroy old objects */ 4797 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4798 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4799 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4800 /* Set Non-overlapping dimensions */ 4801 n_B = pcis->n_B; 4802 n_D = pcis->n - n_B; 4803 n_vertices = pcbddc->n_vertices; 4804 4805 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4806 4807 /* create auxiliary bitmask and allocate workspace */ 4808 if (!sub_schurs || !sub_schurs->reuse_solver) { 4809 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4810 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4811 for (i=0;i<n_vertices;i++) { 4812 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4813 } 4814 4815 for (i=0, n_R=0; i<pcis->n; i++) { 4816 if (!PetscBTLookup(bitmask,i)) { 4817 idx_R_local[n_R++] = i; 4818 } 4819 } 4820 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4821 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4822 4823 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4824 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4825 } 4826 4827 /* Block code */ 4828 vbs = 1; 4829 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4830 if (bs>1 && !(n_vertices%bs)) { 4831 PetscBool is_blocked = PETSC_TRUE; 4832 PetscInt *vary; 4833 if (!sub_schurs || !sub_schurs->reuse_solver) { 4834 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4835 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4836 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4837 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4838 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4839 for (i=0; i<pcis->n/bs; i++) { 4840 if (vary[i]!=0 && vary[i]!=bs) { 4841 is_blocked = PETSC_FALSE; 4842 break; 4843 } 4844 } 4845 ierr = PetscFree(vary);CHKERRQ(ierr); 4846 } else { 4847 /* Verify directly the R set */ 4848 for (i=0; i<n_R/bs; i++) { 4849 PetscInt j,node=idx_R_local[bs*i]; 4850 for (j=1; j<bs; j++) { 4851 if (node != idx_R_local[bs*i+j]-j) { 4852 is_blocked = PETSC_FALSE; 4853 break; 4854 } 4855 } 4856 } 4857 } 4858 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4859 vbs = bs; 4860 for (i=0;i<n_R/vbs;i++) { 4861 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4862 } 4863 } 4864 } 4865 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4866 if (sub_schurs && sub_schurs->reuse_solver) { 4867 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4868 4869 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4870 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4871 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4872 reuse_solver->is_R = pcbddc->is_R_local; 4873 } else { 4874 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4875 } 4876 4877 /* print some info if requested */ 4878 if (pcbddc->dbg_flag) { 4879 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4880 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4881 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4882 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4883 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4884 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4885 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4886 } 4887 4888 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4889 if (!sub_schurs || !sub_schurs->reuse_solver) { 4890 IS is_aux1,is_aux2; 4891 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4892 4893 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4894 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4895 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4896 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4897 for (i=0; i<n_D; i++) { 4898 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4899 } 4900 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4901 for (i=0, j=0; i<n_R; i++) { 4902 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4903 aux_array1[j++] = i; 4904 } 4905 } 4906 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4907 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4908 for (i=0, j=0; i<n_B; i++) { 4909 if (!PetscBTLookup(bitmask,is_indices[i])) { 4910 aux_array2[j++] = i; 4911 } 4912 } 4913 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4914 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4915 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4916 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4917 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4918 4919 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4920 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4921 for (i=0, j=0; i<n_R; i++) { 4922 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4923 aux_array1[j++] = i; 4924 } 4925 } 4926 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4927 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4928 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4929 } 4930 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4931 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4932 } else { 4933 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4934 IS tis; 4935 PetscInt schur_size; 4936 4937 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4938 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4939 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4940 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4941 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4942 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4943 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4944 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4945 } 4946 } 4947 PetscFunctionReturn(0); 4948 } 4949 4950 4951 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4952 { 4953 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4954 PC_IS *pcis = (PC_IS*)pc->data; 4955 PC pc_temp; 4956 Mat A_RR; 4957 MatReuse reuse; 4958 PetscScalar m_one = -1.0; 4959 PetscReal value; 4960 PetscInt n_D,n_R; 4961 PetscBool check_corr,issbaij; 4962 PetscErrorCode ierr; 4963 /* prefixes stuff */ 4964 char dir_prefix[256],neu_prefix[256],str_level[16]; 4965 size_t len; 4966 4967 PetscFunctionBegin; 4968 4969 /* compute prefixes */ 4970 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4971 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4972 if (!pcbddc->current_level) { 4973 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4974 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4975 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4976 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4977 } else { 4978 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 4979 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4980 len -= 15; /* remove "pc_bddc_coarse_" */ 4981 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4982 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4983 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4984 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4985 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4986 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4987 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4988 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4989 } 4990 4991 /* DIRICHLET PROBLEM */ 4992 if (dirichlet) { 4993 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4994 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4995 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4996 if (pcbddc->dbg_flag) { 4997 Mat A_IIn; 4998 4999 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5000 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5001 pcis->A_II = A_IIn; 5002 } 5003 } 5004 if (pcbddc->local_mat->symmetric_set) { 5005 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5006 } 5007 /* Matrix for Dirichlet problem is pcis->A_II */ 5008 n_D = pcis->n - pcis->n_B; 5009 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5010 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5011 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5012 /* default */ 5013 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5014 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5015 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5016 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5017 if (issbaij) { 5018 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5019 } else { 5020 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5021 } 5022 /* Allow user's customization */ 5023 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5024 } 5025 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5026 if (sub_schurs && sub_schurs->reuse_solver) { 5027 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5028 5029 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5030 } 5031 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5032 if (!n_D) { 5033 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5034 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5035 } 5036 /* Set Up KSP for Dirichlet problem of BDDC */ 5037 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5038 /* set ksp_D into pcis data */ 5039 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5040 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5041 pcis->ksp_D = pcbddc->ksp_D; 5042 } 5043 5044 /* NEUMANN PROBLEM */ 5045 A_RR = 0; 5046 if (neumann) { 5047 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5048 PetscInt ibs,mbs; 5049 PetscBool issbaij, reuse_neumann_solver; 5050 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5051 5052 reuse_neumann_solver = PETSC_FALSE; 5053 if (sub_schurs && sub_schurs->reuse_solver) { 5054 IS iP; 5055 5056 reuse_neumann_solver = PETSC_TRUE; 5057 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5058 if (iP) reuse_neumann_solver = PETSC_FALSE; 5059 } 5060 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5061 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5062 if (pcbddc->ksp_R) { /* already created ksp */ 5063 PetscInt nn_R; 5064 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5065 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5066 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5067 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5068 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5069 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5070 reuse = MAT_INITIAL_MATRIX; 5071 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5072 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5073 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5074 reuse = MAT_INITIAL_MATRIX; 5075 } else { /* safe to reuse the matrix */ 5076 reuse = MAT_REUSE_MATRIX; 5077 } 5078 } 5079 /* last check */ 5080 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5081 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5082 reuse = MAT_INITIAL_MATRIX; 5083 } 5084 } else { /* first time, so we need to create the matrix */ 5085 reuse = MAT_INITIAL_MATRIX; 5086 } 5087 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5088 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5089 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5090 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5091 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5092 if (matis->A == pcbddc->local_mat) { 5093 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5094 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5095 } else { 5096 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5097 } 5098 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5099 if (matis->A == pcbddc->local_mat) { 5100 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5101 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5102 } else { 5103 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5104 } 5105 } 5106 /* extract A_RR */ 5107 if (reuse_neumann_solver) { 5108 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5109 5110 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5111 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5112 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5113 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5114 } else { 5115 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5116 } 5117 } else { 5118 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5119 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5120 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5121 } 5122 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5123 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5124 } 5125 if (pcbddc->local_mat->symmetric_set) { 5126 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5127 } 5128 if (!pcbddc->ksp_R) { /* create object if not present */ 5129 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5130 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5131 /* default */ 5132 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5133 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5134 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5135 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5136 if (issbaij) { 5137 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5138 } else { 5139 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5140 } 5141 /* Allow user's customization */ 5142 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5143 } 5144 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5145 if (!n_R) { 5146 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5147 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5148 } 5149 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5150 /* Reuse solver if it is present */ 5151 if (reuse_neumann_solver) { 5152 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5153 5154 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5155 } 5156 /* Set Up KSP for Neumann problem of BDDC */ 5157 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5158 } 5159 5160 if (pcbddc->dbg_flag) { 5161 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5162 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5163 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5164 } 5165 5166 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5167 check_corr = PETSC_FALSE; 5168 if (pcbddc->NullSpace_corr[0]) { 5169 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5170 } 5171 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5172 check_corr = PETSC_TRUE; 5173 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5174 } 5175 if (neumann && pcbddc->NullSpace_corr[2]) { 5176 check_corr = PETSC_TRUE; 5177 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5178 } 5179 /* check Dirichlet and Neumann solvers */ 5180 if (pcbddc->dbg_flag) { 5181 if (dirichlet) { /* Dirichlet */ 5182 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5183 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5184 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5185 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5186 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5187 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5188 if (check_corr) { 5189 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5190 } 5191 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5192 } 5193 if (neumann) { /* Neumann */ 5194 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5195 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5196 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5197 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5198 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5199 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5200 if (check_corr) { 5201 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5202 } 5203 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5204 } 5205 } 5206 /* free Neumann problem's matrix */ 5207 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5208 PetscFunctionReturn(0); 5209 } 5210 5211 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5212 { 5213 PetscErrorCode ierr; 5214 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5215 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5216 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5217 5218 PetscFunctionBegin; 5219 if (!reuse_solver) { 5220 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5221 } 5222 if (!pcbddc->switch_static) { 5223 if (applytranspose && pcbddc->local_auxmat1) { 5224 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5225 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5226 } 5227 if (!reuse_solver) { 5228 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5229 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5230 } else { 5231 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5232 5233 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5234 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5235 } 5236 } else { 5237 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5238 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5239 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5240 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5241 if (applytranspose && pcbddc->local_auxmat1) { 5242 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5243 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5244 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5245 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5246 } 5247 } 5248 if (!reuse_solver || pcbddc->switch_static) { 5249 if (applytranspose) { 5250 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5251 } else { 5252 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5253 } 5254 } else { 5255 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5256 5257 if (applytranspose) { 5258 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5259 } else { 5260 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5261 } 5262 } 5263 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5264 if (!pcbddc->switch_static) { 5265 if (!reuse_solver) { 5266 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5267 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5268 } else { 5269 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5270 5271 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5272 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5273 } 5274 if (!applytranspose && pcbddc->local_auxmat1) { 5275 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5276 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5277 } 5278 } else { 5279 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5280 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5281 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5282 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5283 if (!applytranspose && pcbddc->local_auxmat1) { 5284 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5285 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5286 } 5287 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5288 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5289 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5290 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5291 } 5292 PetscFunctionReturn(0); 5293 } 5294 5295 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5296 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5297 { 5298 PetscErrorCode ierr; 5299 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5300 PC_IS* pcis = (PC_IS*) (pc->data); 5301 const PetscScalar zero = 0.0; 5302 5303 PetscFunctionBegin; 5304 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5305 if (!pcbddc->benign_apply_coarse_only) { 5306 if (applytranspose) { 5307 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5308 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5309 } else { 5310 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5311 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5312 } 5313 } else { 5314 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5315 } 5316 5317 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5318 if (pcbddc->benign_n) { 5319 PetscScalar *array; 5320 PetscInt j; 5321 5322 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5323 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5324 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5325 } 5326 5327 /* start communications from local primal nodes to rhs of coarse solver */ 5328 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5329 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5330 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5331 5332 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5333 if (pcbddc->coarse_ksp) { 5334 Mat coarse_mat; 5335 Vec rhs,sol; 5336 MatNullSpace nullsp; 5337 PetscBool isbddc = PETSC_FALSE; 5338 5339 if (pcbddc->benign_have_null) { 5340 PC coarse_pc; 5341 5342 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5343 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5344 /* we need to propagate to coarser levels the need for a possible benign correction */ 5345 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5346 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5347 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5348 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5349 } 5350 } 5351 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5352 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5353 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5354 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5355 if (nullsp) { 5356 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5357 } 5358 if (applytranspose) { 5359 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5360 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5361 } else { 5362 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5363 PC coarse_pc; 5364 5365 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5366 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5367 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5368 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5369 } else { 5370 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5371 } 5372 } 5373 /* we don't need the benign correction at coarser levels anymore */ 5374 if (pcbddc->benign_have_null && isbddc) { 5375 PC coarse_pc; 5376 PC_BDDC* coarsepcbddc; 5377 5378 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5379 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5380 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5381 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5382 } 5383 if (nullsp) { 5384 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5385 } 5386 } 5387 5388 /* Local solution on R nodes */ 5389 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5390 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5391 } 5392 /* communications from coarse sol to local primal nodes */ 5393 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5394 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5395 5396 /* Sum contributions from the two levels */ 5397 if (!pcbddc->benign_apply_coarse_only) { 5398 if (applytranspose) { 5399 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5400 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5401 } else { 5402 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5403 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5404 } 5405 /* store p0 */ 5406 if (pcbddc->benign_n) { 5407 PetscScalar *array; 5408 PetscInt j; 5409 5410 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5411 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5412 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5413 } 5414 } else { /* expand the coarse solution */ 5415 if (applytranspose) { 5416 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5417 } else { 5418 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5419 } 5420 } 5421 PetscFunctionReturn(0); 5422 } 5423 5424 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5425 { 5426 PetscErrorCode ierr; 5427 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5428 PetscScalar *array; 5429 Vec from,to; 5430 5431 PetscFunctionBegin; 5432 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5433 from = pcbddc->coarse_vec; 5434 to = pcbddc->vec1_P; 5435 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5436 Vec tvec; 5437 5438 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5439 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5440 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5441 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5442 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5443 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5444 } 5445 } else { /* from local to global -> put data in coarse right hand side */ 5446 from = pcbddc->vec1_P; 5447 to = pcbddc->coarse_vec; 5448 } 5449 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5450 PetscFunctionReturn(0); 5451 } 5452 5453 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5454 { 5455 PetscErrorCode ierr; 5456 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5457 PetscScalar *array; 5458 Vec from,to; 5459 5460 PetscFunctionBegin; 5461 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5462 from = pcbddc->coarse_vec; 5463 to = pcbddc->vec1_P; 5464 } else { /* from local to global -> put data in coarse right hand side */ 5465 from = pcbddc->vec1_P; 5466 to = pcbddc->coarse_vec; 5467 } 5468 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5469 if (smode == SCATTER_FORWARD) { 5470 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5471 Vec tvec; 5472 5473 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5474 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5475 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5476 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5477 } 5478 } else { 5479 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5480 ierr = VecResetArray(from);CHKERRQ(ierr); 5481 } 5482 } 5483 PetscFunctionReturn(0); 5484 } 5485 5486 /* uncomment for testing purposes */ 5487 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5488 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5489 { 5490 PetscErrorCode ierr; 5491 PC_IS* pcis = (PC_IS*)(pc->data); 5492 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5493 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5494 /* one and zero */ 5495 PetscScalar one=1.0,zero=0.0; 5496 /* space to store constraints and their local indices */ 5497 PetscScalar *constraints_data; 5498 PetscInt *constraints_idxs,*constraints_idxs_B; 5499 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5500 PetscInt *constraints_n; 5501 /* iterators */ 5502 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5503 /* BLAS integers */ 5504 PetscBLASInt lwork,lierr; 5505 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5506 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5507 /* reuse */ 5508 PetscInt olocal_primal_size,olocal_primal_size_cc; 5509 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5510 /* change of basis */ 5511 PetscBool qr_needed; 5512 PetscBT change_basis,qr_needed_idx; 5513 /* auxiliary stuff */ 5514 PetscInt *nnz,*is_indices; 5515 PetscInt ncc; 5516 /* some quantities */ 5517 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5518 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5519 5520 PetscFunctionBegin; 5521 /* Destroy Mat objects computed previously */ 5522 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5523 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5524 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5525 /* save info on constraints from previous setup (if any) */ 5526 olocal_primal_size = pcbddc->local_primal_size; 5527 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5528 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5529 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5530 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5531 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5532 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5533 5534 if (!pcbddc->adaptive_selection) { 5535 IS ISForVertices,*ISForFaces,*ISForEdges; 5536 MatNullSpace nearnullsp; 5537 const Vec *nearnullvecs; 5538 Vec *localnearnullsp; 5539 PetscScalar *array; 5540 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5541 PetscBool nnsp_has_cnst; 5542 /* LAPACK working arrays for SVD or POD */ 5543 PetscBool skip_lapack,boolforchange; 5544 PetscScalar *work; 5545 PetscReal *singular_vals; 5546 #if defined(PETSC_USE_COMPLEX) 5547 PetscReal *rwork; 5548 #endif 5549 #if defined(PETSC_MISSING_LAPACK_GESVD) 5550 PetscScalar *temp_basis,*correlation_mat; 5551 #else 5552 PetscBLASInt dummy_int=1; 5553 PetscScalar dummy_scalar=1.; 5554 #endif 5555 5556 /* Get index sets for faces, edges and vertices from graph */ 5557 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5558 /* print some info */ 5559 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5560 PetscInt nv; 5561 5562 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5563 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5564 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5565 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5567 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5569 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5570 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5571 } 5572 5573 /* free unneeded index sets */ 5574 if (!pcbddc->use_vertices) { 5575 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5576 } 5577 if (!pcbddc->use_edges) { 5578 for (i=0;i<n_ISForEdges;i++) { 5579 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5580 } 5581 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5582 n_ISForEdges = 0; 5583 } 5584 if (!pcbddc->use_faces) { 5585 for (i=0;i<n_ISForFaces;i++) { 5586 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5587 } 5588 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5589 n_ISForFaces = 0; 5590 } 5591 5592 /* check if near null space is attached to global mat */ 5593 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5594 if (nearnullsp) { 5595 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5596 /* remove any stored info */ 5597 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5598 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5599 /* store information for BDDC solver reuse */ 5600 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5601 pcbddc->onearnullspace = nearnullsp; 5602 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5603 for (i=0;i<nnsp_size;i++) { 5604 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5605 } 5606 } else { /* if near null space is not provided BDDC uses constants by default */ 5607 nnsp_size = 0; 5608 nnsp_has_cnst = PETSC_TRUE; 5609 } 5610 /* get max number of constraints on a single cc */ 5611 max_constraints = nnsp_size; 5612 if (nnsp_has_cnst) max_constraints++; 5613 5614 /* 5615 Evaluate maximum storage size needed by the procedure 5616 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5617 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5618 There can be multiple constraints per connected component 5619 */ 5620 n_vertices = 0; 5621 if (ISForVertices) { 5622 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5623 } 5624 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5625 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5626 5627 total_counts = n_ISForFaces+n_ISForEdges; 5628 total_counts *= max_constraints; 5629 total_counts += n_vertices; 5630 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5631 5632 total_counts = 0; 5633 max_size_of_constraint = 0; 5634 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5635 IS used_is; 5636 if (i<n_ISForEdges) { 5637 used_is = ISForEdges[i]; 5638 } else { 5639 used_is = ISForFaces[i-n_ISForEdges]; 5640 } 5641 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5642 total_counts += j; 5643 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5644 } 5645 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5646 5647 /* get local part of global near null space vectors */ 5648 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5649 for (k=0;k<nnsp_size;k++) { 5650 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5651 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5652 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5653 } 5654 5655 /* whether or not to skip lapack calls */ 5656 skip_lapack = PETSC_TRUE; 5657 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5658 5659 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5660 if (!skip_lapack) { 5661 PetscScalar temp_work; 5662 5663 #if defined(PETSC_MISSING_LAPACK_GESVD) 5664 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5665 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5666 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5667 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5668 #if defined(PETSC_USE_COMPLEX) 5669 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5670 #endif 5671 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5672 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5673 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5674 lwork = -1; 5675 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5676 #if !defined(PETSC_USE_COMPLEX) 5677 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5678 #else 5679 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5680 #endif 5681 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5682 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5683 #else /* on missing GESVD */ 5684 /* SVD */ 5685 PetscInt max_n,min_n; 5686 max_n = max_size_of_constraint; 5687 min_n = max_constraints; 5688 if (max_size_of_constraint < max_constraints) { 5689 min_n = max_size_of_constraint; 5690 max_n = max_constraints; 5691 } 5692 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5693 #if defined(PETSC_USE_COMPLEX) 5694 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5695 #endif 5696 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5697 lwork = -1; 5698 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5699 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5700 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5701 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5702 #if !defined(PETSC_USE_COMPLEX) 5703 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5704 #else 5705 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5706 #endif 5707 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5708 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5709 #endif /* on missing GESVD */ 5710 /* Allocate optimal workspace */ 5711 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5712 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5713 } 5714 /* Now we can loop on constraining sets */ 5715 total_counts = 0; 5716 constraints_idxs_ptr[0] = 0; 5717 constraints_data_ptr[0] = 0; 5718 /* vertices */ 5719 if (n_vertices) { 5720 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5721 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5722 for (i=0;i<n_vertices;i++) { 5723 constraints_n[total_counts] = 1; 5724 constraints_data[total_counts] = 1.0; 5725 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5726 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5727 total_counts++; 5728 } 5729 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5730 n_vertices = total_counts; 5731 } 5732 5733 /* edges and faces */ 5734 total_counts_cc = total_counts; 5735 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5736 IS used_is; 5737 PetscBool idxs_copied = PETSC_FALSE; 5738 5739 if (ncc<n_ISForEdges) { 5740 used_is = ISForEdges[ncc]; 5741 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5742 } else { 5743 used_is = ISForFaces[ncc-n_ISForEdges]; 5744 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5745 } 5746 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5747 5748 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5749 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5750 /* change of basis should not be performed on local periodic nodes */ 5751 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5752 if (nnsp_has_cnst) { 5753 PetscScalar quad_value; 5754 5755 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5756 idxs_copied = PETSC_TRUE; 5757 5758 if (!pcbddc->use_nnsp_true) { 5759 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5760 } else { 5761 quad_value = 1.0; 5762 } 5763 for (j=0;j<size_of_constraint;j++) { 5764 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5765 } 5766 temp_constraints++; 5767 total_counts++; 5768 } 5769 for (k=0;k<nnsp_size;k++) { 5770 PetscReal real_value; 5771 PetscScalar *ptr_to_data; 5772 5773 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5774 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5775 for (j=0;j<size_of_constraint;j++) { 5776 ptr_to_data[j] = array[is_indices[j]]; 5777 } 5778 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5779 /* check if array is null on the connected component */ 5780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5781 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5782 if (real_value > 0.0) { /* keep indices and values */ 5783 temp_constraints++; 5784 total_counts++; 5785 if (!idxs_copied) { 5786 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5787 idxs_copied = PETSC_TRUE; 5788 } 5789 } 5790 } 5791 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5792 valid_constraints = temp_constraints; 5793 if (!pcbddc->use_nnsp_true && temp_constraints) { 5794 if (temp_constraints == 1) { /* just normalize the constraint */ 5795 PetscScalar norm,*ptr_to_data; 5796 5797 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5798 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5799 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5800 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5801 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5802 } else { /* perform SVD */ 5803 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5804 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5805 5806 #if defined(PETSC_MISSING_LAPACK_GESVD) 5807 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5808 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5809 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5810 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5811 from that computed using LAPACKgesvd 5812 -> This is due to a different computation of eigenvectors in LAPACKheev 5813 -> The quality of the POD-computed basis will be the same */ 5814 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5815 /* Store upper triangular part of correlation matrix */ 5816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5817 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5818 for (j=0;j<temp_constraints;j++) { 5819 for (k=0;k<j+1;k++) { 5820 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5821 } 5822 } 5823 /* compute eigenvalues and eigenvectors of correlation matrix */ 5824 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5825 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5826 #if !defined(PETSC_USE_COMPLEX) 5827 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5828 #else 5829 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5830 #endif 5831 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5832 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5833 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5834 j = 0; 5835 while (j < temp_constraints && singular_vals[j] < tol) j++; 5836 total_counts = total_counts-j; 5837 valid_constraints = temp_constraints-j; 5838 /* scale and copy POD basis into used quadrature memory */ 5839 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5840 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5841 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5842 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5843 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5844 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5845 if (j<temp_constraints) { 5846 PetscInt ii; 5847 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5848 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5849 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5850 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5851 for (k=0;k<temp_constraints-j;k++) { 5852 for (ii=0;ii<size_of_constraint;ii++) { 5853 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5854 } 5855 } 5856 } 5857 #else /* on missing GESVD */ 5858 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5859 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5860 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5861 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5862 #if !defined(PETSC_USE_COMPLEX) 5863 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5864 #else 5865 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5866 #endif 5867 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5868 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5869 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5870 k = temp_constraints; 5871 if (k > size_of_constraint) k = size_of_constraint; 5872 j = 0; 5873 while (j < k && singular_vals[k-j-1] < tol) j++; 5874 valid_constraints = k-j; 5875 total_counts = total_counts-temp_constraints+valid_constraints; 5876 #endif /* on missing GESVD */ 5877 } 5878 } 5879 /* update pointers information */ 5880 if (valid_constraints) { 5881 constraints_n[total_counts_cc] = valid_constraints; 5882 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5883 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5884 /* set change_of_basis flag */ 5885 if (boolforchange) { 5886 PetscBTSet(change_basis,total_counts_cc); 5887 } 5888 total_counts_cc++; 5889 } 5890 } 5891 /* free workspace */ 5892 if (!skip_lapack) { 5893 ierr = PetscFree(work);CHKERRQ(ierr); 5894 #if defined(PETSC_USE_COMPLEX) 5895 ierr = PetscFree(rwork);CHKERRQ(ierr); 5896 #endif 5897 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5898 #if defined(PETSC_MISSING_LAPACK_GESVD) 5899 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5900 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5901 #endif 5902 } 5903 for (k=0;k<nnsp_size;k++) { 5904 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5905 } 5906 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5907 /* free index sets of faces, edges and vertices */ 5908 for (i=0;i<n_ISForFaces;i++) { 5909 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5910 } 5911 if (n_ISForFaces) { 5912 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5913 } 5914 for (i=0;i<n_ISForEdges;i++) { 5915 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5916 } 5917 if (n_ISForEdges) { 5918 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5919 } 5920 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5921 } else { 5922 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5923 5924 total_counts = 0; 5925 n_vertices = 0; 5926 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5927 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5928 } 5929 max_constraints = 0; 5930 total_counts_cc = 0; 5931 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5932 total_counts += pcbddc->adaptive_constraints_n[i]; 5933 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5934 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5935 } 5936 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5937 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5938 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5939 constraints_data = pcbddc->adaptive_constraints_data; 5940 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5941 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5942 total_counts_cc = 0; 5943 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5944 if (pcbddc->adaptive_constraints_n[i]) { 5945 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5946 } 5947 } 5948 #if 0 5949 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5950 for (i=0;i<total_counts_cc;i++) { 5951 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5952 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5953 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5954 printf(" %d",constraints_idxs[j]); 5955 } 5956 printf("\n"); 5957 printf("number of cc: %d\n",constraints_n[i]); 5958 } 5959 for (i=0;i<n_vertices;i++) { 5960 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5961 } 5962 for (i=0;i<sub_schurs->n_subs;i++) { 5963 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5964 } 5965 #endif 5966 5967 max_size_of_constraint = 0; 5968 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5969 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5970 /* Change of basis */ 5971 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5972 if (pcbddc->use_change_of_basis) { 5973 for (i=0;i<sub_schurs->n_subs;i++) { 5974 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5975 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5976 } 5977 } 5978 } 5979 } 5980 pcbddc->local_primal_size = total_counts; 5981 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5982 5983 /* map constraints_idxs in boundary numbering */ 5984 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5985 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5986 5987 /* Create constraint matrix */ 5988 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5989 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5990 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5991 5992 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5993 /* determine if a QR strategy is needed for change of basis */ 5994 qr_needed = PETSC_FALSE; 5995 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5996 total_primal_vertices=0; 5997 pcbddc->local_primal_size_cc = 0; 5998 for (i=0;i<total_counts_cc;i++) { 5999 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6000 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6001 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6002 pcbddc->local_primal_size_cc += 1; 6003 } else if (PetscBTLookup(change_basis,i)) { 6004 for (k=0;k<constraints_n[i];k++) { 6005 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6006 } 6007 pcbddc->local_primal_size_cc += constraints_n[i]; 6008 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6009 PetscBTSet(qr_needed_idx,i); 6010 qr_needed = PETSC_TRUE; 6011 } 6012 } else { 6013 pcbddc->local_primal_size_cc += 1; 6014 } 6015 } 6016 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6017 pcbddc->n_vertices = total_primal_vertices; 6018 /* permute indices in order to have a sorted set of vertices */ 6019 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6020 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6021 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6022 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6023 6024 /* nonzero structure of constraint matrix */ 6025 /* and get reference dof for local constraints */ 6026 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6027 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6028 6029 j = total_primal_vertices; 6030 total_counts = total_primal_vertices; 6031 cum = total_primal_vertices; 6032 for (i=n_vertices;i<total_counts_cc;i++) { 6033 if (!PetscBTLookup(change_basis,i)) { 6034 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6035 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6036 cum++; 6037 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6038 for (k=0;k<constraints_n[i];k++) { 6039 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6040 nnz[j+k] = size_of_constraint; 6041 } 6042 j += constraints_n[i]; 6043 } 6044 } 6045 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6046 ierr = PetscFree(nnz);CHKERRQ(ierr); 6047 6048 /* set values in constraint matrix */ 6049 for (i=0;i<total_primal_vertices;i++) { 6050 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6051 } 6052 total_counts = total_primal_vertices; 6053 for (i=n_vertices;i<total_counts_cc;i++) { 6054 if (!PetscBTLookup(change_basis,i)) { 6055 PetscInt *cols; 6056 6057 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6058 cols = constraints_idxs+constraints_idxs_ptr[i]; 6059 for (k=0;k<constraints_n[i];k++) { 6060 PetscInt row = total_counts+k; 6061 PetscScalar *vals; 6062 6063 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6064 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6065 } 6066 total_counts += constraints_n[i]; 6067 } 6068 } 6069 /* assembling */ 6070 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6071 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6072 6073 /* 6074 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6075 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6076 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6077 */ 6078 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6079 if (pcbddc->use_change_of_basis) { 6080 /* dual and primal dofs on a single cc */ 6081 PetscInt dual_dofs,primal_dofs; 6082 /* working stuff for GEQRF */ 6083 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6084 PetscBLASInt lqr_work; 6085 /* working stuff for UNGQR */ 6086 PetscScalar *gqr_work,lgqr_work_t; 6087 PetscBLASInt lgqr_work; 6088 /* working stuff for TRTRS */ 6089 PetscScalar *trs_rhs; 6090 PetscBLASInt Blas_NRHS; 6091 /* pointers for values insertion into change of basis matrix */ 6092 PetscInt *start_rows,*start_cols; 6093 PetscScalar *start_vals; 6094 /* working stuff for values insertion */ 6095 PetscBT is_primal; 6096 PetscInt *aux_primal_numbering_B; 6097 /* matrix sizes */ 6098 PetscInt global_size,local_size; 6099 /* temporary change of basis */ 6100 Mat localChangeOfBasisMatrix; 6101 /* extra space for debugging */ 6102 PetscScalar *dbg_work; 6103 6104 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6105 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6106 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6107 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6108 /* nonzeros for local mat */ 6109 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6110 if (!pcbddc->benign_change || pcbddc->fake_change) { 6111 for (i=0;i<pcis->n;i++) nnz[i]=1; 6112 } else { 6113 const PetscInt *ii; 6114 PetscInt n; 6115 PetscBool flg_row; 6116 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6117 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6118 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6119 } 6120 for (i=n_vertices;i<total_counts_cc;i++) { 6121 if (PetscBTLookup(change_basis,i)) { 6122 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6123 if (PetscBTLookup(qr_needed_idx,i)) { 6124 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6125 } else { 6126 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6127 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6128 } 6129 } 6130 } 6131 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6132 ierr = PetscFree(nnz);CHKERRQ(ierr); 6133 /* Set interior change in the matrix */ 6134 if (!pcbddc->benign_change || pcbddc->fake_change) { 6135 for (i=0;i<pcis->n;i++) { 6136 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6137 } 6138 } else { 6139 const PetscInt *ii,*jj; 6140 PetscScalar *aa; 6141 PetscInt n; 6142 PetscBool flg_row; 6143 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6144 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6145 for (i=0;i<n;i++) { 6146 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6147 } 6148 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6149 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6150 } 6151 6152 if (pcbddc->dbg_flag) { 6153 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6154 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6155 } 6156 6157 6158 /* Now we loop on the constraints which need a change of basis */ 6159 /* 6160 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6161 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6162 6163 Basic blocks of change of basis matrix T computed by 6164 6165 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6166 6167 | 1 0 ... 0 s_1/S | 6168 | 0 1 ... 0 s_2/S | 6169 | ... | 6170 | 0 ... 1 s_{n-1}/S | 6171 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6172 6173 with S = \sum_{i=1}^n s_i^2 6174 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6175 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6176 6177 - QR decomposition of constraints otherwise 6178 */ 6179 if (qr_needed) { 6180 /* space to store Q */ 6181 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6182 /* array to store scaling factors for reflectors */ 6183 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6184 /* first we issue queries for optimal work */ 6185 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6186 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6187 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6188 lqr_work = -1; 6189 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6190 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6191 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6192 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6193 lgqr_work = -1; 6194 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6195 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6196 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6197 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6198 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6199 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6200 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6201 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6202 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6203 /* array to store rhs and solution of triangular solver */ 6204 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6205 /* allocating workspace for check */ 6206 if (pcbddc->dbg_flag) { 6207 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6208 } 6209 } 6210 /* array to store whether a node is primal or not */ 6211 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6212 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6213 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6214 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6215 for (i=0;i<total_primal_vertices;i++) { 6216 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6217 } 6218 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6219 6220 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6221 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6222 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6223 if (PetscBTLookup(change_basis,total_counts)) { 6224 /* get constraint info */ 6225 primal_dofs = constraints_n[total_counts]; 6226 dual_dofs = size_of_constraint-primal_dofs; 6227 6228 if (pcbddc->dbg_flag) { 6229 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6230 } 6231 6232 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6233 6234 /* copy quadrature constraints for change of basis check */ 6235 if (pcbddc->dbg_flag) { 6236 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6237 } 6238 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6239 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6240 6241 /* compute QR decomposition of constraints */ 6242 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6243 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6244 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6245 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6246 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6247 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6249 6250 /* explictly compute R^-T */ 6251 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6252 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6253 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6254 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6255 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6256 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6257 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6258 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6259 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6260 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6261 6262 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6263 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6264 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6265 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6267 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6268 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6269 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6270 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6271 6272 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6273 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6274 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6275 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6276 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6277 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6278 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6279 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6280 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6281 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6282 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6283 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6284 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6285 6286 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6287 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6288 /* insert cols for primal dofs */ 6289 for (j=0;j<primal_dofs;j++) { 6290 start_vals = &qr_basis[j*size_of_constraint]; 6291 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6292 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6293 } 6294 /* insert cols for dual dofs */ 6295 for (j=0,k=0;j<dual_dofs;k++) { 6296 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6297 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6298 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6299 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6300 j++; 6301 } 6302 } 6303 6304 /* check change of basis */ 6305 if (pcbddc->dbg_flag) { 6306 PetscInt ii,jj; 6307 PetscBool valid_qr=PETSC_TRUE; 6308 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6309 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6310 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6311 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6312 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6313 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6314 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6315 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6316 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6317 for (jj=0;jj<size_of_constraint;jj++) { 6318 for (ii=0;ii<primal_dofs;ii++) { 6319 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6320 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6321 } 6322 } 6323 if (!valid_qr) { 6324 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6325 for (jj=0;jj<size_of_constraint;jj++) { 6326 for (ii=0;ii<primal_dofs;ii++) { 6327 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6328 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6329 } 6330 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6331 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6332 } 6333 } 6334 } 6335 } else { 6336 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6337 } 6338 } 6339 } else { /* simple transformation block */ 6340 PetscInt row,col; 6341 PetscScalar val,norm; 6342 6343 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6344 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6345 for (j=0;j<size_of_constraint;j++) { 6346 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6347 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6348 if (!PetscBTLookup(is_primal,row_B)) { 6349 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6350 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6351 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6352 } else { 6353 for (k=0;k<size_of_constraint;k++) { 6354 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6355 if (row != col) { 6356 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6357 } else { 6358 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6359 } 6360 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6361 } 6362 } 6363 } 6364 if (pcbddc->dbg_flag) { 6365 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6366 } 6367 } 6368 } else { 6369 if (pcbddc->dbg_flag) { 6370 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6371 } 6372 } 6373 } 6374 6375 /* free workspace */ 6376 if (qr_needed) { 6377 if (pcbddc->dbg_flag) { 6378 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6379 } 6380 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6381 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6382 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6383 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6384 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6385 } 6386 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6387 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6388 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6389 6390 /* assembling of global change of variable */ 6391 if (!pcbddc->fake_change) { 6392 Mat tmat; 6393 PetscInt bs; 6394 6395 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6396 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6397 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6398 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6399 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6400 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6401 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6402 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6403 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6404 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6405 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6406 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6407 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6408 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6409 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6410 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6411 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6412 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6413 6414 /* check */ 6415 if (pcbddc->dbg_flag) { 6416 PetscReal error; 6417 Vec x,x_change; 6418 6419 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6420 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6421 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6422 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6423 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6424 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6425 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6426 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6427 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6428 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6429 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6430 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6431 if (error > PETSC_SMALL) { 6432 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6433 } 6434 ierr = VecDestroy(&x);CHKERRQ(ierr); 6435 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6436 } 6437 /* adapt sub_schurs computed (if any) */ 6438 if (pcbddc->use_deluxe_scaling) { 6439 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6440 6441 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6442 if (sub_schurs && sub_schurs->S_Ej_all) { 6443 Mat S_new,tmat; 6444 IS is_all_N,is_V_Sall = NULL; 6445 6446 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6447 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6448 if (pcbddc->deluxe_zerorows) { 6449 ISLocalToGlobalMapping NtoSall; 6450 IS is_V; 6451 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6452 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6453 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6454 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6455 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6456 } 6457 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6458 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6459 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6460 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6461 if (pcbddc->deluxe_zerorows) { 6462 const PetscScalar *array; 6463 const PetscInt *idxs_V,*idxs_all; 6464 PetscInt i,n_V; 6465 6466 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6467 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6468 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6469 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6470 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6471 for (i=0;i<n_V;i++) { 6472 PetscScalar val; 6473 PetscInt idx; 6474 6475 idx = idxs_V[i]; 6476 val = array[idxs_all[idxs_V[i]]]; 6477 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6478 } 6479 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6480 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6481 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6482 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6483 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6484 } 6485 sub_schurs->S_Ej_all = S_new; 6486 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6487 if (sub_schurs->sum_S_Ej_all) { 6488 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6489 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6490 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6491 if (pcbddc->deluxe_zerorows) { 6492 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6493 } 6494 sub_schurs->sum_S_Ej_all = S_new; 6495 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6496 } 6497 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6498 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6499 } 6500 /* destroy any change of basis context in sub_schurs */ 6501 if (sub_schurs && sub_schurs->change) { 6502 PetscInt i; 6503 6504 for (i=0;i<sub_schurs->n_subs;i++) { 6505 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6506 } 6507 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6508 } 6509 } 6510 if (pcbddc->switch_static) { /* need to save the local change */ 6511 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6512 } else { 6513 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6514 } 6515 /* determine if any process has changed the pressures locally */ 6516 pcbddc->change_interior = pcbddc->benign_have_null; 6517 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6518 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6519 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6520 pcbddc->use_qr_single = qr_needed; 6521 } 6522 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6523 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6524 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6525 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6526 } else { 6527 Mat benign_global = NULL; 6528 if (pcbddc->benign_have_null) { 6529 Mat tmat; 6530 6531 pcbddc->change_interior = PETSC_TRUE; 6532 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6533 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6534 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6535 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6536 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6537 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6538 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6539 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6540 if (pcbddc->benign_change) { 6541 Mat M; 6542 6543 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6544 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6545 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6546 ierr = MatDestroy(&M);CHKERRQ(ierr); 6547 } else { 6548 Mat eye; 6549 PetscScalar *array; 6550 6551 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6552 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6553 for (i=0;i<pcis->n;i++) { 6554 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6555 } 6556 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6557 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6558 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6559 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6560 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6561 } 6562 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6563 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6564 } 6565 if (pcbddc->user_ChangeOfBasisMatrix) { 6566 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6567 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6568 } else if (pcbddc->benign_have_null) { 6569 pcbddc->ChangeOfBasisMatrix = benign_global; 6570 } 6571 } 6572 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6573 IS is_global; 6574 const PetscInt *gidxs; 6575 6576 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6577 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6578 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6579 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6580 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6581 } 6582 } 6583 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6584 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6585 } 6586 6587 if (!pcbddc->fake_change) { 6588 /* add pressure dofs to set of primal nodes for numbering purposes */ 6589 for (i=0;i<pcbddc->benign_n;i++) { 6590 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6591 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6592 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6593 pcbddc->local_primal_size_cc++; 6594 pcbddc->local_primal_size++; 6595 } 6596 6597 /* check if a new primal space has been introduced (also take into account benign trick) */ 6598 pcbddc->new_primal_space_local = PETSC_TRUE; 6599 if (olocal_primal_size == pcbddc->local_primal_size) { 6600 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6601 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6602 if (!pcbddc->new_primal_space_local) { 6603 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6604 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6605 } 6606 } 6607 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6608 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6609 } 6610 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6611 6612 /* flush dbg viewer */ 6613 if (pcbddc->dbg_flag) { 6614 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6615 } 6616 6617 /* free workspace */ 6618 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6619 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6620 if (!pcbddc->adaptive_selection) { 6621 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6622 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6623 } else { 6624 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6625 pcbddc->adaptive_constraints_idxs_ptr, 6626 pcbddc->adaptive_constraints_data_ptr, 6627 pcbddc->adaptive_constraints_idxs, 6628 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6629 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6630 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6631 } 6632 PetscFunctionReturn(0); 6633 } 6634 6635 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6636 { 6637 ISLocalToGlobalMapping map; 6638 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6639 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6640 PetscInt i,N; 6641 PetscBool rcsr = PETSC_FALSE; 6642 PetscErrorCode ierr; 6643 6644 PetscFunctionBegin; 6645 if (pcbddc->recompute_topography) { 6646 pcbddc->graphanalyzed = PETSC_FALSE; 6647 /* Reset previously computed graph */ 6648 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6649 /* Init local Graph struct */ 6650 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6651 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6652 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6653 6654 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6655 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6656 } 6657 /* Check validity of the csr graph passed in by the user */ 6658 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6659 6660 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6661 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6662 PetscInt *xadj,*adjncy; 6663 PetscInt nvtxs; 6664 PetscBool flg_row=PETSC_FALSE; 6665 6666 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6667 if (flg_row) { 6668 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6669 pcbddc->computed_rowadj = PETSC_TRUE; 6670 } 6671 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6672 rcsr = PETSC_TRUE; 6673 } 6674 if (pcbddc->dbg_flag) { 6675 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6676 } 6677 6678 /* Setup of Graph */ 6679 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6680 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6681 6682 /* attach info on disconnected subdomains if present */ 6683 if (pcbddc->n_local_subs) { 6684 PetscInt *local_subs; 6685 6686 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6687 for (i=0;i<pcbddc->n_local_subs;i++) { 6688 const PetscInt *idxs; 6689 PetscInt nl,j; 6690 6691 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6692 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6693 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6694 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6695 } 6696 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6697 pcbddc->mat_graph->local_subs = local_subs; 6698 } 6699 } 6700 6701 if (!pcbddc->graphanalyzed) { 6702 /* Graph's connected components analysis */ 6703 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6704 pcbddc->graphanalyzed = PETSC_TRUE; 6705 } 6706 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6707 PetscFunctionReturn(0); 6708 } 6709 6710 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6711 { 6712 PetscInt i,j; 6713 PetscScalar *alphas; 6714 PetscErrorCode ierr; 6715 6716 PetscFunctionBegin; 6717 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6718 for (i=0;i<n;i++) { 6719 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6720 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6721 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6722 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6723 } 6724 ierr = PetscFree(alphas);CHKERRQ(ierr); 6725 PetscFunctionReturn(0); 6726 } 6727 6728 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6729 { 6730 Mat A; 6731 PetscInt n_neighs,*neighs,*n_shared,**shared; 6732 PetscMPIInt size,rank,color; 6733 PetscInt *xadj,*adjncy; 6734 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6735 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6736 PetscInt void_procs,*procs_candidates = NULL; 6737 PetscInt xadj_count,*count; 6738 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6739 PetscSubcomm psubcomm; 6740 MPI_Comm subcomm; 6741 PetscErrorCode ierr; 6742 6743 PetscFunctionBegin; 6744 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6745 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6746 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6747 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6748 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6749 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6750 6751 if (have_void) *have_void = PETSC_FALSE; 6752 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6753 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6754 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6755 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6756 im_active = !!n; 6757 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6758 void_procs = size - active_procs; 6759 /* get ranks of of non-active processes in mat communicator */ 6760 if (void_procs) { 6761 PetscInt ncand; 6762 6763 if (have_void) *have_void = PETSC_TRUE; 6764 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6765 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6766 for (i=0,ncand=0;i<size;i++) { 6767 if (!procs_candidates[i]) { 6768 procs_candidates[ncand++] = i; 6769 } 6770 } 6771 /* force n_subdomains to be not greater that the number of non-active processes */ 6772 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6773 } 6774 6775 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6776 number of subdomains requested 1 -> send to master or first candidate in voids */ 6777 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6778 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6779 PetscInt issize,isidx,dest; 6780 if (*n_subdomains == 1) dest = 0; 6781 else dest = rank; 6782 if (im_active) { 6783 issize = 1; 6784 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6785 isidx = procs_candidates[dest]; 6786 } else { 6787 isidx = dest; 6788 } 6789 } else { 6790 issize = 0; 6791 isidx = -1; 6792 } 6793 if (*n_subdomains != 1) *n_subdomains = active_procs; 6794 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6795 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6796 PetscFunctionReturn(0); 6797 } 6798 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6799 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6800 threshold = PetscMax(threshold,2); 6801 6802 /* Get info on mapping */ 6803 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6804 6805 /* build local CSR graph of subdomains' connectivity */ 6806 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6807 xadj[0] = 0; 6808 xadj[1] = PetscMax(n_neighs-1,0); 6809 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6810 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6811 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6812 for (i=1;i<n_neighs;i++) 6813 for (j=0;j<n_shared[i];j++) 6814 count[shared[i][j]] += 1; 6815 6816 xadj_count = 0; 6817 for (i=1;i<n_neighs;i++) { 6818 for (j=0;j<n_shared[i];j++) { 6819 if (count[shared[i][j]] < threshold) { 6820 adjncy[xadj_count] = neighs[i]; 6821 adjncy_wgt[xadj_count] = n_shared[i]; 6822 xadj_count++; 6823 break; 6824 } 6825 } 6826 } 6827 xadj[1] = xadj_count; 6828 ierr = PetscFree(count);CHKERRQ(ierr); 6829 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6830 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6831 6832 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6833 6834 /* Restrict work on active processes only */ 6835 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6836 if (void_procs) { 6837 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6838 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6839 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6840 subcomm = PetscSubcommChild(psubcomm); 6841 } else { 6842 psubcomm = NULL; 6843 subcomm = PetscObjectComm((PetscObject)mat); 6844 } 6845 6846 v_wgt = NULL; 6847 if (!color) { 6848 ierr = PetscFree(xadj);CHKERRQ(ierr); 6849 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6850 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6851 } else { 6852 Mat subdomain_adj; 6853 IS new_ranks,new_ranks_contig; 6854 MatPartitioning partitioner; 6855 PetscInt rstart=0,rend=0; 6856 PetscInt *is_indices,*oldranks; 6857 PetscMPIInt size; 6858 PetscBool aggregate; 6859 6860 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6861 if (void_procs) { 6862 PetscInt prank = rank; 6863 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6864 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6865 for (i=0;i<xadj[1];i++) { 6866 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6867 } 6868 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6869 } else { 6870 oldranks = NULL; 6871 } 6872 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6873 if (aggregate) { /* TODO: all this part could be made more efficient */ 6874 PetscInt lrows,row,ncols,*cols; 6875 PetscMPIInt nrank; 6876 PetscScalar *vals; 6877 6878 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6879 lrows = 0; 6880 if (nrank<redprocs) { 6881 lrows = size/redprocs; 6882 if (nrank<size%redprocs) lrows++; 6883 } 6884 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6885 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6886 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6887 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6888 row = nrank; 6889 ncols = xadj[1]-xadj[0]; 6890 cols = adjncy; 6891 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6892 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6893 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6894 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6895 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6896 ierr = PetscFree(xadj);CHKERRQ(ierr); 6897 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6898 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6899 ierr = PetscFree(vals);CHKERRQ(ierr); 6900 if (use_vwgt) { 6901 Vec v; 6902 const PetscScalar *array; 6903 PetscInt nl; 6904 6905 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6906 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6907 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6908 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6909 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6910 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6911 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6912 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6913 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6914 ierr = VecDestroy(&v);CHKERRQ(ierr); 6915 } 6916 } else { 6917 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6918 if (use_vwgt) { 6919 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6920 v_wgt[0] = n; 6921 } 6922 } 6923 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6924 6925 /* Partition */ 6926 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6927 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6928 if (v_wgt) { 6929 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6930 } 6931 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6932 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6933 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6934 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6935 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6936 6937 /* renumber new_ranks to avoid "holes" in new set of processors */ 6938 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6939 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6940 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6941 if (!aggregate) { 6942 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6943 #if defined(PETSC_USE_DEBUG) 6944 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6945 #endif 6946 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6947 } else if (oldranks) { 6948 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6949 } else { 6950 ranks_send_to_idx[0] = is_indices[0]; 6951 } 6952 } else { 6953 PetscInt idx = 0; 6954 PetscMPIInt tag; 6955 MPI_Request *reqs; 6956 6957 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6958 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6959 for (i=rstart;i<rend;i++) { 6960 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6961 } 6962 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6963 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6964 ierr = PetscFree(reqs);CHKERRQ(ierr); 6965 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6966 #if defined(PETSC_USE_DEBUG) 6967 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6968 #endif 6969 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 6970 } else if (oldranks) { 6971 ranks_send_to_idx[0] = oldranks[idx]; 6972 } else { 6973 ranks_send_to_idx[0] = idx; 6974 } 6975 } 6976 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6977 /* clean up */ 6978 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6979 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6980 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6981 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6982 } 6983 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6984 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6985 6986 /* assemble parallel IS for sends */ 6987 i = 1; 6988 if (!color) i=0; 6989 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6990 PetscFunctionReturn(0); 6991 } 6992 6993 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6994 6995 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6996 { 6997 Mat local_mat; 6998 IS is_sends_internal; 6999 PetscInt rows,cols,new_local_rows; 7000 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7001 PetscBool ismatis,isdense,newisdense,destroy_mat; 7002 ISLocalToGlobalMapping l2gmap; 7003 PetscInt* l2gmap_indices; 7004 const PetscInt* is_indices; 7005 MatType new_local_type; 7006 /* buffers */ 7007 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7008 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7009 PetscInt *recv_buffer_idxs_local; 7010 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7011 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7012 /* MPI */ 7013 MPI_Comm comm,comm_n; 7014 PetscSubcomm subcomm; 7015 PetscMPIInt n_sends,n_recvs,commsize; 7016 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7017 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7018 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7019 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7020 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7021 PetscErrorCode ierr; 7022 7023 PetscFunctionBegin; 7024 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7025 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7026 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7027 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7028 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7029 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7030 PetscValidLogicalCollectiveBool(mat,reuse,6); 7031 PetscValidLogicalCollectiveInt(mat,nis,8); 7032 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7033 if (nvecs) { 7034 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7035 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7036 } 7037 /* further checks */ 7038 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7039 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7040 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7041 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7042 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7043 if (reuse && *mat_n) { 7044 PetscInt mrows,mcols,mnrows,mncols; 7045 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7046 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7047 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7048 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7049 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7050 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7051 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7052 } 7053 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7054 PetscValidLogicalCollectiveInt(mat,bs,0); 7055 7056 /* prepare IS for sending if not provided */ 7057 if (!is_sends) { 7058 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7059 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7060 } else { 7061 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7062 is_sends_internal = is_sends; 7063 } 7064 7065 /* get comm */ 7066 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7067 7068 /* compute number of sends */ 7069 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7070 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7071 7072 /* compute number of receives */ 7073 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7074 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7075 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7076 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7077 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7078 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7079 ierr = PetscFree(iflags);CHKERRQ(ierr); 7080 7081 /* restrict comm if requested */ 7082 subcomm = 0; 7083 destroy_mat = PETSC_FALSE; 7084 if (restrict_comm) { 7085 PetscMPIInt color,subcommsize; 7086 7087 color = 0; 7088 if (restrict_full) { 7089 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7090 } else { 7091 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7092 } 7093 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7094 subcommsize = commsize - subcommsize; 7095 /* check if reuse has been requested */ 7096 if (reuse) { 7097 if (*mat_n) { 7098 PetscMPIInt subcommsize2; 7099 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7100 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7101 comm_n = PetscObjectComm((PetscObject)*mat_n); 7102 } else { 7103 comm_n = PETSC_COMM_SELF; 7104 } 7105 } else { /* MAT_INITIAL_MATRIX */ 7106 PetscMPIInt rank; 7107 7108 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7109 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7110 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7111 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7112 comm_n = PetscSubcommChild(subcomm); 7113 } 7114 /* flag to destroy *mat_n if not significative */ 7115 if (color) destroy_mat = PETSC_TRUE; 7116 } else { 7117 comm_n = comm; 7118 } 7119 7120 /* prepare send/receive buffers */ 7121 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7122 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7123 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7124 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7125 if (nis) { 7126 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7127 } 7128 7129 /* Get data from local matrices */ 7130 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7131 /* TODO: See below some guidelines on how to prepare the local buffers */ 7132 /* 7133 send_buffer_vals should contain the raw values of the local matrix 7134 send_buffer_idxs should contain: 7135 - MatType_PRIVATE type 7136 - PetscInt size_of_l2gmap 7137 - PetscInt global_row_indices[size_of_l2gmap] 7138 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7139 */ 7140 else { 7141 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7142 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7143 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7144 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7145 send_buffer_idxs[1] = i; 7146 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7147 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7148 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7149 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7150 for (i=0;i<n_sends;i++) { 7151 ilengths_vals[is_indices[i]] = len*len; 7152 ilengths_idxs[is_indices[i]] = len+2; 7153 } 7154 } 7155 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7156 /* additional is (if any) */ 7157 if (nis) { 7158 PetscMPIInt psum; 7159 PetscInt j; 7160 for (j=0,psum=0;j<nis;j++) { 7161 PetscInt plen; 7162 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7163 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7164 psum += len+1; /* indices + lenght */ 7165 } 7166 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7167 for (j=0,psum=0;j<nis;j++) { 7168 PetscInt plen; 7169 const PetscInt *is_array_idxs; 7170 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7171 send_buffer_idxs_is[psum] = plen; 7172 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7173 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7174 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7175 psum += plen+1; /* indices + lenght */ 7176 } 7177 for (i=0;i<n_sends;i++) { 7178 ilengths_idxs_is[is_indices[i]] = psum; 7179 } 7180 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7181 } 7182 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7183 7184 buf_size_idxs = 0; 7185 buf_size_vals = 0; 7186 buf_size_idxs_is = 0; 7187 buf_size_vecs = 0; 7188 for (i=0;i<n_recvs;i++) { 7189 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7190 buf_size_vals += (PetscInt)olengths_vals[i]; 7191 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7192 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7193 } 7194 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7195 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7196 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7197 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7198 7199 /* get new tags for clean communications */ 7200 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7201 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7202 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7203 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7204 7205 /* allocate for requests */ 7206 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7207 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7208 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7209 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7210 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7211 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7212 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7213 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7214 7215 /* communications */ 7216 ptr_idxs = recv_buffer_idxs; 7217 ptr_vals = recv_buffer_vals; 7218 ptr_idxs_is = recv_buffer_idxs_is; 7219 ptr_vecs = recv_buffer_vecs; 7220 for (i=0;i<n_recvs;i++) { 7221 source_dest = onodes[i]; 7222 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7223 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7224 ptr_idxs += olengths_idxs[i]; 7225 ptr_vals += olengths_vals[i]; 7226 if (nis) { 7227 source_dest = onodes_is[i]; 7228 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7229 ptr_idxs_is += olengths_idxs_is[i]; 7230 } 7231 if (nvecs) { 7232 source_dest = onodes[i]; 7233 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7234 ptr_vecs += olengths_idxs[i]-2; 7235 } 7236 } 7237 for (i=0;i<n_sends;i++) { 7238 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7239 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7240 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7241 if (nis) { 7242 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7243 } 7244 if (nvecs) { 7245 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7246 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7247 } 7248 } 7249 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7250 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7251 7252 /* assemble new l2g map */ 7253 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7254 ptr_idxs = recv_buffer_idxs; 7255 new_local_rows = 0; 7256 for (i=0;i<n_recvs;i++) { 7257 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7258 ptr_idxs += olengths_idxs[i]; 7259 } 7260 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7261 ptr_idxs = recv_buffer_idxs; 7262 new_local_rows = 0; 7263 for (i=0;i<n_recvs;i++) { 7264 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7265 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7266 ptr_idxs += olengths_idxs[i]; 7267 } 7268 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7269 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7270 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7271 7272 /* infer new local matrix type from received local matrices type */ 7273 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7274 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7275 if (n_recvs) { 7276 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7277 ptr_idxs = recv_buffer_idxs; 7278 for (i=0;i<n_recvs;i++) { 7279 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7280 new_local_type_private = MATAIJ_PRIVATE; 7281 break; 7282 } 7283 ptr_idxs += olengths_idxs[i]; 7284 } 7285 switch (new_local_type_private) { 7286 case MATDENSE_PRIVATE: 7287 new_local_type = MATSEQAIJ; 7288 bs = 1; 7289 break; 7290 case MATAIJ_PRIVATE: 7291 new_local_type = MATSEQAIJ; 7292 bs = 1; 7293 break; 7294 case MATBAIJ_PRIVATE: 7295 new_local_type = MATSEQBAIJ; 7296 break; 7297 case MATSBAIJ_PRIVATE: 7298 new_local_type = MATSEQSBAIJ; 7299 break; 7300 default: 7301 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7302 break; 7303 } 7304 } else { /* by default, new_local_type is seqaij */ 7305 new_local_type = MATSEQAIJ; 7306 bs = 1; 7307 } 7308 7309 /* create MATIS object if needed */ 7310 if (!reuse) { 7311 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7312 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7313 } else { 7314 /* it also destroys the local matrices */ 7315 if (*mat_n) { 7316 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7317 } else { /* this is a fake object */ 7318 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7319 } 7320 } 7321 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7322 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7323 7324 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7325 7326 /* Global to local map of received indices */ 7327 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7328 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7329 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7330 7331 /* restore attributes -> type of incoming data and its size */ 7332 buf_size_idxs = 0; 7333 for (i=0;i<n_recvs;i++) { 7334 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7335 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7336 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7337 } 7338 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7339 7340 /* set preallocation */ 7341 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7342 if (!newisdense) { 7343 PetscInt *new_local_nnz=0; 7344 7345 ptr_idxs = recv_buffer_idxs_local; 7346 if (n_recvs) { 7347 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7348 } 7349 for (i=0;i<n_recvs;i++) { 7350 PetscInt j; 7351 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7352 for (j=0;j<*(ptr_idxs+1);j++) { 7353 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7354 } 7355 } else { 7356 /* TODO */ 7357 } 7358 ptr_idxs += olengths_idxs[i]; 7359 } 7360 if (new_local_nnz) { 7361 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7362 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7363 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7364 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7365 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7366 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7367 } else { 7368 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7369 } 7370 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7371 } else { 7372 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7373 } 7374 7375 /* set values */ 7376 ptr_vals = recv_buffer_vals; 7377 ptr_idxs = recv_buffer_idxs_local; 7378 for (i=0;i<n_recvs;i++) { 7379 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7380 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7381 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7382 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7383 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7384 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7385 } else { 7386 /* TODO */ 7387 } 7388 ptr_idxs += olengths_idxs[i]; 7389 ptr_vals += olengths_vals[i]; 7390 } 7391 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7392 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7393 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7394 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7395 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7396 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7397 7398 #if 0 7399 if (!restrict_comm) { /* check */ 7400 Vec lvec,rvec; 7401 PetscReal infty_error; 7402 7403 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7404 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7405 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7406 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7407 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7408 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7409 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7410 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7411 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7412 } 7413 #endif 7414 7415 /* assemble new additional is (if any) */ 7416 if (nis) { 7417 PetscInt **temp_idxs,*count_is,j,psum; 7418 7419 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7420 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7421 ptr_idxs = recv_buffer_idxs_is; 7422 psum = 0; 7423 for (i=0;i<n_recvs;i++) { 7424 for (j=0;j<nis;j++) { 7425 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7426 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7427 psum += plen; 7428 ptr_idxs += plen+1; /* shift pointer to received data */ 7429 } 7430 } 7431 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7432 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7433 for (i=1;i<nis;i++) { 7434 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7435 } 7436 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7437 ptr_idxs = recv_buffer_idxs_is; 7438 for (i=0;i<n_recvs;i++) { 7439 for (j=0;j<nis;j++) { 7440 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7441 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7442 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7443 ptr_idxs += plen+1; /* shift pointer to received data */ 7444 } 7445 } 7446 for (i=0;i<nis;i++) { 7447 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7448 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7449 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7450 } 7451 ierr = PetscFree(count_is);CHKERRQ(ierr); 7452 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7453 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7454 } 7455 /* free workspace */ 7456 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7457 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7458 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7459 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7460 if (isdense) { 7461 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7462 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7463 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7464 } else { 7465 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7466 } 7467 if (nis) { 7468 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7469 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7470 } 7471 7472 if (nvecs) { 7473 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7474 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7475 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7476 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7477 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7478 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7479 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7480 /* set values */ 7481 ptr_vals = recv_buffer_vecs; 7482 ptr_idxs = recv_buffer_idxs_local; 7483 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7484 for (i=0;i<n_recvs;i++) { 7485 PetscInt j; 7486 for (j=0;j<*(ptr_idxs+1);j++) { 7487 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7488 } 7489 ptr_idxs += olengths_idxs[i]; 7490 ptr_vals += olengths_idxs[i]-2; 7491 } 7492 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7493 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7494 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7495 } 7496 7497 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7498 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7499 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7500 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7501 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7502 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7503 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7504 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7505 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7506 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7507 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7508 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7509 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7510 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7511 ierr = PetscFree(onodes);CHKERRQ(ierr); 7512 if (nis) { 7513 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7514 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7515 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7516 } 7517 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7518 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7519 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7520 for (i=0;i<nis;i++) { 7521 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7522 } 7523 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7524 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7525 } 7526 *mat_n = NULL; 7527 } 7528 PetscFunctionReturn(0); 7529 } 7530 7531 /* temporary hack into ksp private data structure */ 7532 #include <petsc/private/kspimpl.h> 7533 7534 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7535 { 7536 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7537 PC_IS *pcis = (PC_IS*)pc->data; 7538 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7539 Mat coarsedivudotp = NULL; 7540 Mat coarseG,t_coarse_mat_is; 7541 MatNullSpace CoarseNullSpace = NULL; 7542 ISLocalToGlobalMapping coarse_islg; 7543 IS coarse_is,*isarray; 7544 PetscInt i,im_active=-1,active_procs=-1; 7545 PetscInt nis,nisdofs,nisneu,nisvert; 7546 PC pc_temp; 7547 PCType coarse_pc_type; 7548 KSPType coarse_ksp_type; 7549 PetscBool multilevel_requested,multilevel_allowed; 7550 PetscBool coarse_reuse; 7551 PetscInt ncoarse,nedcfield; 7552 PetscBool compute_vecs = PETSC_FALSE; 7553 PetscScalar *array; 7554 MatReuse coarse_mat_reuse; 7555 PetscBool restr, full_restr, have_void; 7556 PetscMPIInt commsize; 7557 PetscErrorCode ierr; 7558 7559 PetscFunctionBegin; 7560 /* Assign global numbering to coarse dofs */ 7561 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7562 PetscInt ocoarse_size; 7563 compute_vecs = PETSC_TRUE; 7564 7565 pcbddc->new_primal_space = PETSC_TRUE; 7566 ocoarse_size = pcbddc->coarse_size; 7567 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7568 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7569 /* see if we can avoid some work */ 7570 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7571 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7572 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7573 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7574 coarse_reuse = PETSC_FALSE; 7575 } else { /* we can safely reuse already computed coarse matrix */ 7576 coarse_reuse = PETSC_TRUE; 7577 } 7578 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7579 coarse_reuse = PETSC_FALSE; 7580 } 7581 /* reset any subassembling information */ 7582 if (!coarse_reuse || pcbddc->recompute_topography) { 7583 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7584 } 7585 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7586 coarse_reuse = PETSC_TRUE; 7587 } 7588 /* assemble coarse matrix */ 7589 if (coarse_reuse && pcbddc->coarse_ksp) { 7590 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7591 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7592 coarse_mat_reuse = MAT_REUSE_MATRIX; 7593 } else { 7594 coarse_mat = NULL; 7595 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7596 } 7597 7598 /* creates temporary l2gmap and IS for coarse indexes */ 7599 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7600 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7601 7602 /* creates temporary MATIS object for coarse matrix */ 7603 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7604 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7605 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7606 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7607 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7608 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7609 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7610 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7611 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7612 7613 /* count "active" (i.e. with positive local size) and "void" processes */ 7614 im_active = !!(pcis->n); 7615 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7616 7617 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7618 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7619 /* full_restr : just use the receivers from the subassembling pattern */ 7620 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7621 coarse_mat_is = NULL; 7622 multilevel_allowed = PETSC_FALSE; 7623 multilevel_requested = PETSC_FALSE; 7624 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7625 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7626 if (multilevel_requested) { 7627 ncoarse = active_procs/pcbddc->coarsening_ratio; 7628 restr = PETSC_FALSE; 7629 full_restr = PETSC_FALSE; 7630 } else { 7631 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7632 restr = PETSC_TRUE; 7633 full_restr = PETSC_TRUE; 7634 } 7635 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7636 ncoarse = PetscMax(1,ncoarse); 7637 if (!pcbddc->coarse_subassembling) { 7638 if (pcbddc->coarsening_ratio > 1) { 7639 if (multilevel_requested) { 7640 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7641 } else { 7642 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7643 } 7644 } else { 7645 PetscMPIInt rank; 7646 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7647 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7648 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7649 } 7650 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7651 PetscInt psum; 7652 if (pcbddc->coarse_ksp) psum = 1; 7653 else psum = 0; 7654 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7655 if (ncoarse < commsize) have_void = PETSC_TRUE; 7656 } 7657 /* determine if we can go multilevel */ 7658 if (multilevel_requested) { 7659 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7660 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7661 } 7662 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7663 7664 /* dump subassembling pattern */ 7665 if (pcbddc->dbg_flag && multilevel_allowed) { 7666 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7667 } 7668 7669 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7670 nedcfield = -1; 7671 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7672 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7673 const PetscInt *idxs; 7674 ISLocalToGlobalMapping tmap; 7675 7676 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7677 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7678 /* allocate space for temporary storage */ 7679 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7680 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7681 /* allocate for IS array */ 7682 nisdofs = pcbddc->n_ISForDofsLocal; 7683 if (pcbddc->nedclocal) { 7684 if (pcbddc->nedfield > -1) { 7685 nedcfield = pcbddc->nedfield; 7686 } else { 7687 nedcfield = 0; 7688 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7689 nisdofs = 1; 7690 } 7691 } 7692 nisneu = !!pcbddc->NeumannBoundariesLocal; 7693 nisvert = 0; /* nisvert is not used */ 7694 nis = nisdofs + nisneu + nisvert; 7695 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7696 /* dofs splitting */ 7697 for (i=0;i<nisdofs;i++) { 7698 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7699 if (nedcfield != i) { 7700 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7701 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7702 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7703 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7704 } else { 7705 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7706 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7707 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7708 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7709 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7710 } 7711 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7712 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7713 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7714 } 7715 /* neumann boundaries */ 7716 if (pcbddc->NeumannBoundariesLocal) { 7717 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7718 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7719 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7720 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7721 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7722 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7723 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7724 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7725 } 7726 /* free memory */ 7727 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7728 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7729 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7730 } else { 7731 nis = 0; 7732 nisdofs = 0; 7733 nisneu = 0; 7734 nisvert = 0; 7735 isarray = NULL; 7736 } 7737 /* destroy no longer needed map */ 7738 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7739 7740 /* subassemble */ 7741 if (multilevel_allowed) { 7742 Vec vp[1]; 7743 PetscInt nvecs = 0; 7744 PetscBool reuse,reuser; 7745 7746 if (coarse_mat) reuse = PETSC_TRUE; 7747 else reuse = PETSC_FALSE; 7748 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7749 vp[0] = NULL; 7750 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7751 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7752 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7753 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7754 nvecs = 1; 7755 7756 if (pcbddc->divudotp) { 7757 Mat B,loc_divudotp; 7758 Vec v,p; 7759 IS dummy; 7760 PetscInt np; 7761 7762 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7763 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7764 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7765 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7766 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7767 ierr = VecSet(p,1.);CHKERRQ(ierr); 7768 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7769 ierr = VecDestroy(&p);CHKERRQ(ierr); 7770 ierr = MatDestroy(&B);CHKERRQ(ierr); 7771 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7772 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7773 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7774 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7775 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7776 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7777 ierr = VecDestroy(&v);CHKERRQ(ierr); 7778 } 7779 } 7780 if (reuser) { 7781 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7782 } else { 7783 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7784 } 7785 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7786 PetscScalar *arraym,*arrayv; 7787 PetscInt nl; 7788 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7790 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7791 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7792 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7793 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7794 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7795 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7796 } else { 7797 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7798 } 7799 } else { 7800 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7801 } 7802 if (coarse_mat_is || coarse_mat) { 7803 PetscMPIInt size; 7804 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7805 if (!multilevel_allowed) { 7806 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7807 } else { 7808 Mat A; 7809 7810 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7811 if (coarse_mat_is) { 7812 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7813 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7814 coarse_mat = coarse_mat_is; 7815 } 7816 /* be sure we don't have MatSeqDENSE as local mat */ 7817 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7818 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7819 } 7820 } 7821 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7822 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7823 7824 /* create local to global scatters for coarse problem */ 7825 if (compute_vecs) { 7826 PetscInt lrows; 7827 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7828 if (coarse_mat) { 7829 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7830 } else { 7831 lrows = 0; 7832 } 7833 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7834 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7835 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7836 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7837 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7838 } 7839 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7840 7841 /* set defaults for coarse KSP and PC */ 7842 if (multilevel_allowed) { 7843 coarse_ksp_type = KSPRICHARDSON; 7844 coarse_pc_type = PCBDDC; 7845 } else { 7846 coarse_ksp_type = KSPPREONLY; 7847 coarse_pc_type = PCREDUNDANT; 7848 } 7849 7850 /* print some info if requested */ 7851 if (pcbddc->dbg_flag) { 7852 if (!multilevel_allowed) { 7853 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7854 if (multilevel_requested) { 7855 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7856 } else if (pcbddc->max_levels) { 7857 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7858 } 7859 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7860 } 7861 } 7862 7863 /* communicate coarse discrete gradient */ 7864 coarseG = NULL; 7865 if (pcbddc->nedcG && multilevel_allowed) { 7866 MPI_Comm ccomm; 7867 if (coarse_mat) { 7868 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7869 } else { 7870 ccomm = MPI_COMM_NULL; 7871 } 7872 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7873 } 7874 7875 /* create the coarse KSP object only once with defaults */ 7876 if (coarse_mat) { 7877 PetscBool isredundant,isnn,isbddc; 7878 PetscViewer dbg_viewer = NULL; 7879 7880 if (pcbddc->dbg_flag) { 7881 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7882 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7883 } 7884 if (!pcbddc->coarse_ksp) { 7885 char prefix[256],str_level[16]; 7886 size_t len; 7887 7888 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7889 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7890 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7891 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7892 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7893 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7894 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7895 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7896 /* TODO is this logic correct? should check for coarse_mat type */ 7897 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7898 /* prefix */ 7899 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7900 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7901 if (!pcbddc->current_level) { 7902 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7903 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7904 } else { 7905 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7906 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7907 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7908 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7909 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 7910 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7911 } 7912 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7913 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7914 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7915 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7916 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7917 /* allow user customization */ 7918 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7919 } 7920 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7921 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7922 if (nisdofs) { 7923 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7924 for (i=0;i<nisdofs;i++) { 7925 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7926 } 7927 } 7928 if (nisneu) { 7929 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7930 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7931 } 7932 if (nisvert) { 7933 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7934 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7935 } 7936 if (coarseG) { 7937 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7938 } 7939 7940 /* get some info after set from options */ 7941 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7942 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7943 if (isbddc && !multilevel_allowed) { 7944 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7945 isbddc = PETSC_FALSE; 7946 } 7947 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7948 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7949 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 7950 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7951 isbddc = PETSC_TRUE; 7952 } 7953 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7954 if (isredundant) { 7955 KSP inner_ksp; 7956 PC inner_pc; 7957 7958 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7959 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7960 } 7961 7962 /* parameters which miss an API */ 7963 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7964 if (isbddc) { 7965 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7966 7967 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7968 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7969 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7970 if (pcbddc_coarse->benign_saddle_point) { 7971 Mat coarsedivudotp_is; 7972 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7973 IS row,col; 7974 const PetscInt *gidxs; 7975 PetscInt n,st,M,N; 7976 7977 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7978 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7979 st = st-n; 7980 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7981 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7982 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7983 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7984 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7985 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7986 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7987 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7988 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7989 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7990 ierr = ISDestroy(&row);CHKERRQ(ierr); 7991 ierr = ISDestroy(&col);CHKERRQ(ierr); 7992 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7993 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7994 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7995 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7996 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7997 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7998 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7999 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8000 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8001 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8002 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8003 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8004 } 8005 } 8006 8007 /* propagate symmetry info of coarse matrix */ 8008 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8009 if (pc->pmat->symmetric_set) { 8010 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8011 } 8012 if (pc->pmat->hermitian_set) { 8013 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8014 } 8015 if (pc->pmat->spd_set) { 8016 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8017 } 8018 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8019 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8020 } 8021 /* set operators */ 8022 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8023 if (pcbddc->dbg_flag) { 8024 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8025 } 8026 } 8027 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8028 ierr = PetscFree(isarray);CHKERRQ(ierr); 8029 #if 0 8030 { 8031 PetscViewer viewer; 8032 char filename[256]; 8033 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8034 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8035 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8036 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8037 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8038 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8039 } 8040 #endif 8041 8042 if (pcbddc->coarse_ksp) { 8043 Vec crhs,csol; 8044 8045 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8046 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8047 if (!csol) { 8048 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8049 } 8050 if (!crhs) { 8051 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8052 } 8053 } 8054 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8055 8056 /* compute null space for coarse solver if the benign trick has been requested */ 8057 if (pcbddc->benign_null) { 8058 8059 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8060 for (i=0;i<pcbddc->benign_n;i++) { 8061 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8062 } 8063 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8064 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8065 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8066 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8067 if (coarse_mat) { 8068 Vec nullv; 8069 PetscScalar *array,*array2; 8070 PetscInt nl; 8071 8072 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8073 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8074 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8075 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8076 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8077 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8078 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8079 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8080 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8081 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8082 } 8083 } 8084 8085 if (pcbddc->coarse_ksp) { 8086 PetscBool ispreonly; 8087 8088 if (CoarseNullSpace) { 8089 PetscBool isnull; 8090 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8091 if (isnull) { 8092 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8093 } 8094 /* TODO: add local nullspaces (if any) */ 8095 } 8096 /* setup coarse ksp */ 8097 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8098 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8099 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8100 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8101 KSP check_ksp; 8102 KSPType check_ksp_type; 8103 PC check_pc; 8104 Vec check_vec,coarse_vec; 8105 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8106 PetscInt its; 8107 PetscBool compute_eigs; 8108 PetscReal *eigs_r,*eigs_c; 8109 PetscInt neigs; 8110 const char *prefix; 8111 8112 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8113 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8114 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8115 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8116 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8117 /* prevent from setup unneeded object */ 8118 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8119 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8120 if (ispreonly) { 8121 check_ksp_type = KSPPREONLY; 8122 compute_eigs = PETSC_FALSE; 8123 } else { 8124 check_ksp_type = KSPGMRES; 8125 compute_eigs = PETSC_TRUE; 8126 } 8127 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8128 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8129 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8130 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8131 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8132 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8133 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8134 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8135 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8136 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8137 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8138 /* create random vec */ 8139 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8140 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8141 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8142 /* solve coarse problem */ 8143 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8144 /* set eigenvalue estimation if preonly has not been requested */ 8145 if (compute_eigs) { 8146 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8147 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8148 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8149 if (neigs) { 8150 lambda_max = eigs_r[neigs-1]; 8151 lambda_min = eigs_r[0]; 8152 if (pcbddc->use_coarse_estimates) { 8153 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8154 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8155 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8156 } 8157 } 8158 } 8159 } 8160 8161 /* check coarse problem residual error */ 8162 if (pcbddc->dbg_flag) { 8163 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8164 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8165 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8166 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8167 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8168 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8169 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8170 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8171 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8172 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8173 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8174 if (CoarseNullSpace) { 8175 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8176 } 8177 if (compute_eigs) { 8178 PetscReal lambda_max_s,lambda_min_s; 8179 KSPConvergedReason reason; 8180 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8181 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8182 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8183 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8184 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8185 for (i=0;i<neigs;i++) { 8186 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8187 } 8188 } 8189 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8190 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8191 } 8192 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8193 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8194 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8195 if (compute_eigs) { 8196 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8197 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8198 } 8199 } 8200 } 8201 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8202 /* print additional info */ 8203 if (pcbddc->dbg_flag) { 8204 /* waits until all processes reaches this point */ 8205 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8206 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8207 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8208 } 8209 8210 /* free memory */ 8211 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8212 PetscFunctionReturn(0); 8213 } 8214 8215 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8216 { 8217 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8218 PC_IS* pcis = (PC_IS*)pc->data; 8219 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8220 IS subset,subset_mult,subset_n; 8221 PetscInt local_size,coarse_size=0; 8222 PetscInt *local_primal_indices=NULL; 8223 const PetscInt *t_local_primal_indices; 8224 PetscErrorCode ierr; 8225 8226 PetscFunctionBegin; 8227 /* Compute global number of coarse dofs */ 8228 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8229 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8230 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8231 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8232 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8233 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8234 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8235 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8236 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8237 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8238 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8239 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8240 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8241 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8242 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8243 8244 /* check numbering */ 8245 if (pcbddc->dbg_flag) { 8246 PetscScalar coarsesum,*array,*array2; 8247 PetscInt i; 8248 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8249 8250 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8251 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8252 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8253 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8254 /* counter */ 8255 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8256 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8257 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8258 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8259 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8260 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8261 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8262 for (i=0;i<pcbddc->local_primal_size;i++) { 8263 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8264 } 8265 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8266 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8267 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8268 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8269 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8270 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8271 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8272 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8273 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8274 for (i=0;i<pcis->n;i++) { 8275 if (array[i] != 0.0 && array[i] != array2[i]) { 8276 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8277 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8278 set_error = PETSC_TRUE; 8279 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8281 } 8282 } 8283 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8284 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8285 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8286 for (i=0;i<pcis->n;i++) { 8287 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8288 } 8289 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8290 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8291 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8292 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8293 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8294 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8295 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8296 PetscInt *gidxs; 8297 8298 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8299 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8300 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8301 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8302 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8303 for (i=0;i<pcbddc->local_primal_size;i++) { 8304 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8305 } 8306 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8307 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8308 } 8309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8310 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8311 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8312 } 8313 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8314 /* get back data */ 8315 *coarse_size_n = coarse_size; 8316 *local_primal_indices_n = local_primal_indices; 8317 PetscFunctionReturn(0); 8318 } 8319 8320 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8321 { 8322 IS localis_t; 8323 PetscInt i,lsize,*idxs,n; 8324 PetscScalar *vals; 8325 PetscErrorCode ierr; 8326 8327 PetscFunctionBegin; 8328 /* get indices in local ordering exploiting local to global map */ 8329 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8330 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8331 for (i=0;i<lsize;i++) vals[i] = 1.0; 8332 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8333 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8334 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8335 if (idxs) { /* multilevel guard */ 8336 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8337 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8338 } 8339 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8340 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8341 ierr = PetscFree(vals);CHKERRQ(ierr); 8342 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8343 /* now compute set in local ordering */ 8344 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8345 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8346 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8347 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8348 for (i=0,lsize=0;i<n;i++) { 8349 if (PetscRealPart(vals[i]) > 0.5) { 8350 lsize++; 8351 } 8352 } 8353 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8354 for (i=0,lsize=0;i<n;i++) { 8355 if (PetscRealPart(vals[i]) > 0.5) { 8356 idxs[lsize++] = i; 8357 } 8358 } 8359 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8360 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8361 *localis = localis_t; 8362 PetscFunctionReturn(0); 8363 } 8364 8365 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8366 { 8367 PC_IS *pcis=(PC_IS*)pc->data; 8368 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8369 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8370 Mat S_j; 8371 PetscInt *used_xadj,*used_adjncy; 8372 PetscBool free_used_adj; 8373 PetscErrorCode ierr; 8374 8375 PetscFunctionBegin; 8376 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8377 free_used_adj = PETSC_FALSE; 8378 if (pcbddc->sub_schurs_layers == -1) { 8379 used_xadj = NULL; 8380 used_adjncy = NULL; 8381 } else { 8382 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8383 used_xadj = pcbddc->mat_graph->xadj; 8384 used_adjncy = pcbddc->mat_graph->adjncy; 8385 } else if (pcbddc->computed_rowadj) { 8386 used_xadj = pcbddc->mat_graph->xadj; 8387 used_adjncy = pcbddc->mat_graph->adjncy; 8388 } else { 8389 PetscBool flg_row=PETSC_FALSE; 8390 const PetscInt *xadj,*adjncy; 8391 PetscInt nvtxs; 8392 8393 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8394 if (flg_row) { 8395 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8396 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8397 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8398 free_used_adj = PETSC_TRUE; 8399 } else { 8400 pcbddc->sub_schurs_layers = -1; 8401 used_xadj = NULL; 8402 used_adjncy = NULL; 8403 } 8404 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8405 } 8406 } 8407 8408 /* setup sub_schurs data */ 8409 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8410 if (!sub_schurs->schur_explicit) { 8411 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8412 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8413 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8414 } else { 8415 Mat change = NULL; 8416 Vec scaling = NULL; 8417 IS change_primal = NULL, iP; 8418 PetscInt benign_n; 8419 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8420 PetscBool isseqaij,need_change = PETSC_FALSE; 8421 PetscBool discrete_harmonic = PETSC_FALSE; 8422 8423 if (!pcbddc->use_vertices && reuse_solvers) { 8424 PetscInt n_vertices; 8425 8426 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8427 reuse_solvers = (PetscBool)!n_vertices; 8428 } 8429 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8430 if (!isseqaij) { 8431 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8432 if (matis->A == pcbddc->local_mat) { 8433 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8434 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8435 } else { 8436 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8437 } 8438 } 8439 if (!pcbddc->benign_change_explicit) { 8440 benign_n = pcbddc->benign_n; 8441 } else { 8442 benign_n = 0; 8443 } 8444 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8445 We need a global reduction to avoid possible deadlocks. 8446 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8447 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8448 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8449 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8450 need_change = (PetscBool)(!need_change); 8451 } 8452 /* If the user defines additional constraints, we import them here. 8453 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8454 if (need_change) { 8455 PC_IS *pcisf; 8456 PC_BDDC *pcbddcf; 8457 PC pcf; 8458 8459 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8460 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8461 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8462 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8463 8464 /* hacks */ 8465 pcisf = (PC_IS*)pcf->data; 8466 pcisf->is_B_local = pcis->is_B_local; 8467 pcisf->vec1_N = pcis->vec1_N; 8468 pcisf->BtoNmap = pcis->BtoNmap; 8469 pcisf->n = pcis->n; 8470 pcisf->n_B = pcis->n_B; 8471 pcbddcf = (PC_BDDC*)pcf->data; 8472 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8473 pcbddcf->mat_graph = pcbddc->mat_graph; 8474 pcbddcf->use_faces = PETSC_TRUE; 8475 pcbddcf->use_change_of_basis = PETSC_TRUE; 8476 pcbddcf->use_change_on_faces = PETSC_TRUE; 8477 pcbddcf->use_qr_single = PETSC_TRUE; 8478 pcbddcf->fake_change = PETSC_TRUE; 8479 8480 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8481 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8482 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8483 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8484 change = pcbddcf->ConstraintMatrix; 8485 pcbddcf->ConstraintMatrix = NULL; 8486 8487 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8488 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8489 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8490 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8491 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8492 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8493 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8494 pcf->ops->destroy = NULL; 8495 pcf->ops->reset = NULL; 8496 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8497 } 8498 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8499 8500 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8501 if (iP) { 8502 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8503 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8504 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8505 } 8506 if (discrete_harmonic) { 8507 Mat A; 8508 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8509 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8510 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8511 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8512 ierr = MatDestroy(&A);CHKERRQ(ierr); 8513 } else { 8514 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8515 } 8516 ierr = MatDestroy(&change);CHKERRQ(ierr); 8517 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8518 } 8519 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8520 8521 /* free adjacency */ 8522 if (free_used_adj) { 8523 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8524 } 8525 PetscFunctionReturn(0); 8526 } 8527 8528 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8529 { 8530 PC_IS *pcis=(PC_IS*)pc->data; 8531 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8532 PCBDDCGraph graph; 8533 PetscErrorCode ierr; 8534 8535 PetscFunctionBegin; 8536 /* attach interface graph for determining subsets */ 8537 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8538 IS verticesIS,verticescomm; 8539 PetscInt vsize,*idxs; 8540 8541 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8542 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8543 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8544 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8545 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8546 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8547 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8548 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8549 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8550 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8551 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8552 } else { 8553 graph = pcbddc->mat_graph; 8554 } 8555 /* print some info */ 8556 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8557 IS vertices; 8558 PetscInt nv,nedges,nfaces; 8559 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8560 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8561 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8562 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8563 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8564 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8565 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8567 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8568 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8569 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8570 } 8571 8572 /* sub_schurs init */ 8573 if (!pcbddc->sub_schurs) { 8574 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8575 } 8576 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8577 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8578 8579 /* free graph struct */ 8580 if (pcbddc->sub_schurs_rebuild) { 8581 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8582 } 8583 PetscFunctionReturn(0); 8584 } 8585 8586 PetscErrorCode PCBDDCCheckOperator(PC pc) 8587 { 8588 PC_IS *pcis=(PC_IS*)pc->data; 8589 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8590 PetscErrorCode ierr; 8591 8592 PetscFunctionBegin; 8593 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8594 IS zerodiag = NULL; 8595 Mat S_j,B0_B=NULL; 8596 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8597 PetscScalar *p0_check,*array,*array2; 8598 PetscReal norm; 8599 PetscInt i; 8600 8601 /* B0 and B0_B */ 8602 if (zerodiag) { 8603 IS dummy; 8604 8605 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8606 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8607 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8608 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8609 } 8610 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8611 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8612 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8613 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8614 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8615 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8616 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8617 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8618 /* S_j */ 8619 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8620 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8621 8622 /* mimic vector in \widetilde{W}_\Gamma */ 8623 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8624 /* continuous in primal space */ 8625 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8626 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8627 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8628 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8629 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8630 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8631 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8632 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8633 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8634 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8635 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8636 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8637 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8638 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8639 8640 /* assemble rhs for coarse problem */ 8641 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8642 /* local with Schur */ 8643 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8644 if (zerodiag) { 8645 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8646 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8647 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8648 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8649 } 8650 /* sum on primal nodes the local contributions */ 8651 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8652 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8653 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8654 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8655 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8656 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8657 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8658 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8659 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8660 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8661 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8662 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8663 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8664 /* scale primal nodes (BDDC sums contibutions) */ 8665 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8666 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8667 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8668 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8669 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8670 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8671 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8672 /* global: \widetilde{B0}_B w_\Gamma */ 8673 if (zerodiag) { 8674 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8675 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8676 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8677 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8678 } 8679 /* BDDC */ 8680 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8681 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8682 8683 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8684 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8685 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8686 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8687 for (i=0;i<pcbddc->benign_n;i++) { 8688 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8689 } 8690 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8691 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8692 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8693 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8694 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8695 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8696 } 8697 PetscFunctionReturn(0); 8698 } 8699 8700 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8701 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8702 { 8703 Mat At; 8704 IS rows; 8705 PetscInt rst,ren; 8706 PetscErrorCode ierr; 8707 PetscLayout rmap; 8708 8709 PetscFunctionBegin; 8710 rst = ren = 0; 8711 if (ccomm != MPI_COMM_NULL) { 8712 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8713 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8714 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8715 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8716 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8717 } 8718 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8719 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8720 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8721 8722 if (ccomm != MPI_COMM_NULL) { 8723 Mat_MPIAIJ *a,*b; 8724 IS from,to; 8725 Vec gvec; 8726 PetscInt lsize; 8727 8728 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8729 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8730 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8731 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8732 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8733 a = (Mat_MPIAIJ*)At->data; 8734 b = (Mat_MPIAIJ*)(*B)->data; 8735 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8736 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8737 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8738 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8739 b->A = a->A; 8740 b->B = a->B; 8741 8742 b->donotstash = a->donotstash; 8743 b->roworiented = a->roworiented; 8744 b->rowindices = 0; 8745 b->rowvalues = 0; 8746 b->getrowactive = PETSC_FALSE; 8747 8748 (*B)->rmap = rmap; 8749 (*B)->factortype = A->factortype; 8750 (*B)->assembled = PETSC_TRUE; 8751 (*B)->insertmode = NOT_SET_VALUES; 8752 (*B)->preallocated = PETSC_TRUE; 8753 8754 if (a->colmap) { 8755 #if defined(PETSC_USE_CTABLE) 8756 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8757 #else 8758 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8759 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8760 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8761 #endif 8762 } else b->colmap = 0; 8763 if (a->garray) { 8764 PetscInt len; 8765 len = a->B->cmap->n; 8766 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8767 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8768 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8769 } else b->garray = 0; 8770 8771 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8772 b->lvec = a->lvec; 8773 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8774 8775 /* cannot use VecScatterCopy */ 8776 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8777 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8778 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8779 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8780 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8781 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8782 ierr = ISDestroy(&from);CHKERRQ(ierr); 8783 ierr = ISDestroy(&to);CHKERRQ(ierr); 8784 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8785 } 8786 ierr = MatDestroy(&At);CHKERRQ(ierr); 8787 PetscFunctionReturn(0); 8788 } 8789