1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* if range is true, it returns B s.t. span{B} = range(A) 10 if range is false, it returns B s.t. range(B) _|_ range(A) */ 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseOrthogonalRangeOrComplement" 13 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 14 { 15 #if !defined(PETSC_USE_COMPLEX) 16 PetscScalar *uwork,*data,*U, ds = 0.; 17 PetscReal *sing; 18 PetscBLASInt bM,bN,lwork,lierr,di = 1; 19 PetscInt ulw,i,nr,nc,n; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 #if defined(PETSC_MISSING_LAPACK_GESVD) 24 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 25 #else 26 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 49 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 50 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 51 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 52 ierr = PetscFPTrapPop();CHKERRQ(ierr); 53 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 54 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 55 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 56 if (!rwork) { 57 ierr = PetscFree(sing);CHKERRQ(ierr); 58 } 59 if (!work) { 60 ierr = PetscFree(uwork);CHKERRQ(ierr); 61 } 62 /* create B */ 63 if (!range) { 64 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 65 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 67 } else { 68 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 69 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 70 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 71 } 72 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscFree(U);CHKERRQ(ierr); 74 #endif 75 #else /* PETSC_USE_COMPLEX */ 76 PetscFunctionBegin; 77 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 78 #endif 79 PetscFunctionReturn(0); 80 } 81 82 /* TODO REMOVE */ 83 #if defined(PRINT_GDET) 84 static int inc = 0; 85 static int lev = 0; 86 #endif 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatGetSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "PCBDDCNedelecSupport" 157 PetscErrorCode PCBDDCNedelecSupport(PC pc) 158 { 159 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 160 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 161 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 162 Vec tvec; 163 PetscSF sfv; 164 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 165 MPI_Comm comm; 166 IS lned,primals,allprimals,nedfieldlocal; 167 IS *eedges,*extrows,*extcols,*alleedges; 168 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 169 PetscScalar *vals,*work; 170 PetscReal *rwork; 171 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 172 PetscInt ne,nv,Lv,order,n,field; 173 PetscInt n_neigh,*neigh,*n_shared,**shared; 174 PetscInt i,j,extmem,cum,maxsize,nee; 175 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 176 PetscInt *sfvleaves,*sfvroots; 177 PetscInt *corners,*cedges; 178 PetscInt *ecount,**eneighs,*vcount,**vneighs; 179 #if defined(PETSC_USE_DEBUG) 180 PetscInt *emarks; 181 #endif 182 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 183 PetscErrorCode ierr; 184 185 PetscFunctionBegin; 186 /* If the discrete gradient is defined for a subset of dofs and global is true, 187 it assumes G is given in global ordering for all the dofs. 188 Otherwise, the ordering is global for the Nedelec field */ 189 order = pcbddc->nedorder; 190 conforming = pcbddc->conforming; 191 field = pcbddc->nedfield; 192 global = pcbddc->nedglobal; 193 setprimal = PETSC_FALSE; 194 print = PETSC_FALSE; 195 singular = PETSC_FALSE; 196 197 /* Command line customization */ 198 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 199 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsInt ("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 202 /* print debug info TODO: to be removed */ 203 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 204 ierr = PetscOptionsEnd();CHKERRQ(ierr); 205 206 /* Return if there are no edges in the decomposition and the problem is not singular */ 207 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 208 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 209 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 210 if (!singular) { 211 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 212 lrc[0] = PETSC_FALSE; 213 for (i=0;i<n;i++) { 214 if (PetscRealPart(vals[i]) > 2.) { 215 lrc[0] = PETSC_TRUE; 216 break; 217 } 218 } 219 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 220 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 221 if (!lrc[1]) PetscFunctionReturn(0); 222 } 223 224 /* Get Nedelec field */ 225 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 226 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 227 if (pcbddc->n_ISForDofsLocal && field >= 0) { 228 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 229 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 230 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 231 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 232 ne = n; 233 nedfieldlocal = NULL; 234 global = PETSC_TRUE; 235 } else if (field == PETSC_DECIDE) { 236 PetscInt rst,ren,*idx; 237 238 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 239 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 241 for (i=rst;i<ren;i++) { 242 PetscInt nc; 243 244 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 246 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 247 } 248 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 249 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 250 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 251 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 252 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 253 } else { 254 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 255 } 256 257 /* Sanity checks */ 258 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 259 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 260 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 261 262 /* Just set primal dofs and return */ 263 if (setprimal) { 264 IS enedfieldlocal; 265 PetscInt *eidxs; 266 267 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 268 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 269 if (nedfieldlocal) { 270 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 271 for (i=0,cum=0;i<ne;i++) { 272 if (PetscRealPart(vals[idxs[i]]) > 2.) { 273 eidxs[cum++] = idxs[i]; 274 } 275 } 276 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 277 } else { 278 for (i=0,cum=0;i<ne;i++) { 279 if (PetscRealPart(vals[i]) > 2.) { 280 eidxs[cum++] = i; 281 } 282 } 283 } 284 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 285 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 286 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 287 ierr = PetscFree(eidxs);CHKERRQ(ierr); 288 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 289 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 /* Compute some l2g maps */ 294 if (nedfieldlocal) { 295 IS is; 296 297 /* need to map from the local Nedelec field to local numbering */ 298 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 299 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 300 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 301 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 302 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 303 if (global) { 304 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 305 el2g = al2g; 306 } else { 307 IS gis; 308 309 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 310 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 311 ierr = ISDestroy(&gis);CHKERRQ(ierr); 312 } 313 ierr = ISDestroy(&is);CHKERRQ(ierr); 314 } else { 315 /* restore default */ 316 pcbddc->nedfield = -1; 317 /* one ref for the destruction of al2g, one for el2g */ 318 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 319 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 320 el2g = al2g; 321 fl2g = NULL; 322 } 323 324 /* Start communication to drop connections for interior edges (for cc analysis only) */ 325 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 326 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 327 if (nedfieldlocal) { 328 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 330 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 331 } else { 332 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 333 } 334 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 335 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 336 337 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 338 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 339 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 340 if (global) { 341 PetscInt rst; 342 343 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 344 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 345 if (matis->sf_rootdata[i] < 2) { 346 matis->sf_rootdata[cum++] = i + rst; 347 } 348 } 349 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 350 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 351 } else { 352 PetscInt *tbz; 353 354 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 355 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 356 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 357 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 for (i=0,cum=0;i<ne;i++) 359 if (matis->sf_leafdata[idxs[i]] == 1) 360 tbz[cum++] = i; 361 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 362 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 363 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 364 ierr = PetscFree(tbz);CHKERRQ(ierr); 365 } 366 } else { /* we need the entire G to infer the nullspace */ 367 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 368 G = pcbddc->discretegradient; 369 } 370 371 /* Extract subdomain relevant rows of G */ 372 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 373 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 374 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 375 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 376 ierr = ISDestroy(&lned);CHKERRQ(ierr); 377 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 378 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 379 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 380 381 /* SF for nodal dofs communications */ 382 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 383 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 384 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 386 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 388 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 389 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 390 i = singular ? 2 : 1; 391 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 392 393 /* Destroy temporary G created in MATIS format and modified G */ 394 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 395 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 396 ierr = MatDestroy(&G);CHKERRQ(ierr); 397 398 if (print) { 399 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 400 ierr = MatView(lG,NULL);CHKERRQ(ierr); 401 } 402 403 /* Save lG for values insertion in change of basis */ 404 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 405 406 /* Analyze the edge-nodes connections (duplicate lG) */ 407 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 408 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 411 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 412 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 413 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 414 /* need to import the boundary specification to ensure the 415 proper detection of coarse edges' endpoints */ 416 if (pcbddc->DirichletBoundariesLocal) { 417 IS is; 418 419 if (fl2g) { 420 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 421 } else { 422 is = pcbddc->DirichletBoundariesLocal; 423 } 424 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 425 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 426 for (i=0;i<cum;i++) { 427 if (idxs[i] >= 0) { 428 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 429 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 430 } 431 } 432 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 433 if (fl2g) { 434 ierr = ISDestroy(&is);CHKERRQ(ierr); 435 } 436 } 437 if (pcbddc->NeumannBoundariesLocal) { 438 IS is; 439 440 if (fl2g) { 441 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 442 } else { 443 is = pcbddc->NeumannBoundariesLocal; 444 } 445 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 446 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 447 for (i=0;i<cum;i++) { 448 if (idxs[i] >= 0) { 449 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 450 } 451 } 452 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 453 if (fl2g) { 454 ierr = ISDestroy(&is);CHKERRQ(ierr); 455 } 456 } 457 458 /* Count neighs per dof */ 459 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 460 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 461 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 462 for (i=1,cum=0;i<n_neigh;i++) { 463 cum += n_shared[i]; 464 for (j=0;j<n_shared[i];j++) { 465 ecount[shared[i][j]]++; 466 } 467 } 468 if (ne) { 469 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 470 } 471 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 472 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 473 for (i=1;i<n_neigh;i++) { 474 for (j=0;j<n_shared[i];j++) { 475 PetscInt k = shared[i][j]; 476 eneighs[k][ecount[k]] = neigh[i]; 477 ecount[k]++; 478 } 479 } 480 for (i=0;i<ne;i++) { 481 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 482 } 483 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 484 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 485 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 486 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 487 for (i=1,cum=0;i<n_neigh;i++) { 488 cum += n_shared[i]; 489 for (j=0;j<n_shared[i];j++) { 490 vcount[shared[i][j]]++; 491 } 492 } 493 if (nv) { 494 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 495 } 496 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 497 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 498 for (i=1;i<n_neigh;i++) { 499 for (j=0;j<n_shared[i];j++) { 500 PetscInt k = shared[i][j]; 501 vneighs[k][vcount[k]] = neigh[i]; 502 vcount[k]++; 503 } 504 } 505 for (i=0;i<nv;i++) { 506 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 507 } 508 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 509 510 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 511 for proper detection of coarse edges' endpoints */ 512 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 513 for (i=0;i<ne;i++) { 514 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 515 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 516 } 517 } 518 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 519 if (!conforming) { 520 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 521 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 522 } 523 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 524 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 525 cum = 0; 526 for (i=0;i<ne;i++) { 527 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 528 if (!PetscBTLookup(btee,i)) { 529 marks[cum++] = i; 530 continue; 531 } 532 /* set badly connected edge dofs as primal */ 533 if (!conforming) { 534 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 535 marks[cum++] = i; 536 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 537 for (j=ii[i];j<ii[i+1];j++) { 538 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 539 } 540 } else { 541 /* every edge dofs should be connected trough a certain number of nodal dofs 542 to other edge dofs belonging to coarse edges 543 - at most 2 endpoints 544 - order-1 interior nodal dofs 545 - no undefined nodal dofs (nconn < order) 546 */ 547 PetscInt ends = 0,ints = 0, undef = 0; 548 for (j=ii[i];j<ii[i+1];j++) { 549 PetscInt v = jj[j],k; 550 PetscInt nconn = iit[v+1]-iit[v]; 551 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 552 if (nconn > order) ends++; 553 else if (nconn == order) ints++; 554 else undef++; 555 } 556 if (undef || ends > 2 || ints != order -1) { 557 marks[cum++] = i; 558 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 559 for (j=ii[i];j<ii[i+1];j++) { 560 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 561 } 562 } 563 } 564 } 565 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 566 if (!order && ii[i+1] != ii[i]) { 567 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 568 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 569 } 570 } 571 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 572 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 573 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 574 if (!conforming) { 575 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 576 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 577 } 578 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 579 580 /* identify splitpoints and corner candidates */ 581 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 582 if (print) { 583 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 584 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 585 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 586 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 587 } 588 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 589 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 590 for (i=0;i<nv;i++) { 591 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 592 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 593 if (!order) { /* variable order */ 594 PetscReal vorder = 0.; 595 596 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 597 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 598 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 599 ord = 1; 600 } 601 #if defined(PETSC_USE_DEBUG) 602 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 603 #endif 604 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 605 if (PetscBTLookup(btbd,jj[j])) { 606 bdir = PETSC_TRUE; 607 break; 608 } 609 if (vc != ecount[jj[j]]) { 610 sneighs = PETSC_FALSE; 611 } else { 612 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 613 for (k=0;k<vc;k++) { 614 if (vn[k] != en[k]) { 615 sneighs = PETSC_FALSE; 616 break; 617 } 618 } 619 } 620 } 621 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 622 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 623 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 624 } else if (test == ord) { 625 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 626 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 627 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 628 } else { 629 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 630 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 631 } 632 } 633 } 634 ierr = PetscFree(ecount);CHKERRQ(ierr); 635 ierr = PetscFree(vcount);CHKERRQ(ierr); 636 if (ne) { 637 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 638 } 639 if (nv) { 640 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 641 } 642 ierr = PetscFree(eneighs);CHKERRQ(ierr); 643 ierr = PetscFree(vneighs);CHKERRQ(ierr); 644 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 645 646 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 647 if (order != 1) { 648 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 649 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 650 for (i=0;i<nv;i++) { 651 if (PetscBTLookup(btvcand,i)) { 652 PetscBool found = PETSC_FALSE; 653 for (j=ii[i];j<ii[i+1] && !found;j++) { 654 PetscInt k,e = jj[j]; 655 if (PetscBTLookup(bte,e)) continue; 656 for (k=iit[e];k<iit[e+1];k++) { 657 PetscInt v = jjt[k]; 658 if (v != i && PetscBTLookup(btvcand,v)) { 659 found = PETSC_TRUE; 660 break; 661 } 662 } 663 } 664 if (!found) { 665 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 666 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 667 } else { 668 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 669 } 670 } 671 } 672 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 673 } 674 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 675 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 676 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 677 678 /* Get the local G^T explicitly */ 679 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 680 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 681 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 682 683 /* Mark interior nodal dofs */ 684 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 685 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 686 for (i=1;i<n_neigh;i++) { 687 for (j=0;j<n_shared[i];j++) { 688 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 689 } 690 } 691 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 692 693 /* communicate corners and splitpoints */ 694 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 695 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 696 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 697 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 698 699 if (print) { 700 IS tbz; 701 702 cum = 0; 703 for (i=0;i<nv;i++) 704 if (sfvleaves[i]) 705 vmarks[cum++] = i; 706 707 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 708 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 709 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 710 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 711 } 712 713 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 714 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 715 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 716 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 717 718 /* Zero rows of lGt corresponding to identified corners 719 and interior nodal dofs */ 720 cum = 0; 721 for (i=0;i<nv;i++) { 722 if (sfvleaves[i]) { 723 vmarks[cum++] = i; 724 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 725 } 726 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 727 } 728 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 729 if (print) { 730 IS tbz; 731 732 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 733 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 734 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 735 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 736 } 737 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 738 ierr = PetscFree(vmarks);CHKERRQ(ierr); 739 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 740 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 741 742 /* Recompute G */ 743 ierr = MatDestroy(&lG);CHKERRQ(ierr); 744 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 745 if (print) { 746 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 747 ierr = MatView(lG,NULL);CHKERRQ(ierr); 748 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 749 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 750 } 751 752 /* Get primal dofs (if any) */ 753 cum = 0; 754 for (i=0;i<ne;i++) { 755 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 756 } 757 if (fl2g) { 758 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 759 } 760 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 761 if (print) { 762 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 763 ierr = ISView(primals,NULL);CHKERRQ(ierr); 764 } 765 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 766 /* TODO: what if the user passed in some of them ? */ 767 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 768 ierr = ISDestroy(&primals);CHKERRQ(ierr); 769 770 /* Compute edge connectivity */ 771 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 772 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 773 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 774 if (fl2g) { 775 PetscBT btf; 776 PetscInt *iia,*jja,*iiu,*jju; 777 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 778 779 /* create CSR for all local dofs */ 780 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 781 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 782 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 783 iiu = pcbddc->mat_graph->xadj; 784 jju = pcbddc->mat_graph->adjncy; 785 } else if (pcbddc->use_local_adj) { 786 rest = PETSC_TRUE; 787 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 788 } else { 789 free = PETSC_TRUE; 790 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 791 iiu[0] = 0; 792 for (i=0;i<n;i++) { 793 iiu[i+1] = i+1; 794 jju[i] = -1; 795 } 796 } 797 798 /* import sizes of CSR */ 799 iia[0] = 0; 800 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 801 802 /* overwrite entries corresponding to the Nedelec field */ 803 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 804 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 805 for (i=0;i<ne;i++) { 806 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 807 iia[idxs[i]+1] = ii[i+1]-ii[i]; 808 } 809 810 /* iia in CSR */ 811 for (i=0;i<n;i++) iia[i+1] += iia[i]; 812 813 /* jja in CSR */ 814 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 815 for (i=0;i<n;i++) 816 if (!PetscBTLookup(btf,i)) 817 for (j=0;j<iiu[i+1]-iiu[i];j++) 818 jja[iia[i]+j] = jju[iiu[i]+j]; 819 820 /* map edge dofs connectivity */ 821 if (jj) { 822 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 823 for (i=0;i<ne;i++) { 824 PetscInt e = idxs[i]; 825 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 826 } 827 } 828 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 829 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 830 if (rest) { 831 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 832 } 833 if (free) { 834 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 835 } 836 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 837 } else { 838 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 839 } 840 841 /* Analyze interface for edge dofs */ 842 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 843 pcbddc->mat_graph->twodim = PETSC_FALSE; 844 845 /* Get coarse edges in the edge space */ 846 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 847 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 848 849 if (fl2g) { 850 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 851 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 852 for (i=0;i<nee;i++) { 853 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 854 } 855 } else { 856 eedges = alleedges; 857 primals = allprimals; 858 } 859 860 /* Mark fine edge dofs with their coarse edge id */ 861 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 862 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 863 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 864 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 865 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 866 if (print) { 867 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 868 ierr = ISView(primals,NULL);CHKERRQ(ierr); 869 } 870 871 maxsize = 0; 872 for (i=0;i<nee;i++) { 873 PetscInt size,mark = i+1; 874 875 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 876 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 for (j=0;j<size;j++) marks[idxs[j]] = mark; 878 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 879 maxsize = PetscMax(maxsize,size); 880 } 881 882 /* Find coarse edge endpoints */ 883 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 884 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 885 for (i=0;i<nee;i++) { 886 PetscInt mark = i+1,size; 887 888 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 889 if (!size && nedfieldlocal) continue; 890 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 891 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 892 if (print) { 893 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 894 ISView(eedges[i],NULL); 895 } 896 for (j=0;j<size;j++) { 897 PetscInt k, ee = idxs[j]; 898 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 899 for (k=ii[ee];k<ii[ee+1];k++) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 901 if (PetscBTLookup(btv,jj[k])) { 902 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 903 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 904 PetscInt k2; 905 PetscBool corner = PETSC_FALSE; 906 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 907 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 908 /* it's a corner if either is connected with an edge dof belonging to a different cc or 909 if the edge dof lie on the natural part of the boundary */ 910 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 911 corner = PETSC_TRUE; 912 break; 913 } 914 } 915 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 916 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 917 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 918 } else { 919 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 920 } 921 } 922 } 923 } 924 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 925 } 926 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 927 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 928 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 929 930 /* Reset marked primal dofs */ 931 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 932 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 933 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 934 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 935 936 /* Now use the initial lG */ 937 ierr = MatDestroy(&lG);CHKERRQ(ierr); 938 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 939 lG = lGinit; 940 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 941 942 /* Compute extended cols indices */ 943 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 944 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 945 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 946 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 947 i *= maxsize; 948 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 949 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 950 eerr = PETSC_FALSE; 951 for (i=0;i<nee;i++) { 952 PetscInt size,found = 0; 953 954 cum = 0; 955 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 956 if (!size && nedfieldlocal) continue; 957 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 958 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 959 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 960 for (j=0;j<size;j++) { 961 PetscInt k,ee = idxs[j]; 962 for (k=ii[ee];k<ii[ee+1];k++) { 963 PetscInt vv = jj[k]; 964 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 965 else if (!PetscBTLookupSet(btvc,vv)) found++; 966 } 967 } 968 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 969 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 970 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 971 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 972 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 973 /* it may happen that endpoints are not defined at this point 974 if it is the case, mark this edge for a second pass */ 975 if (cum != size -1 || found != 2) { 976 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 977 if (print) { 978 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 979 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 980 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 981 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 982 } 983 eerr = PETSC_TRUE; 984 } 985 } 986 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 987 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 988 if (done) { 989 PetscInt *newprimals; 990 991 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 992 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 993 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 995 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 996 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 997 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 998 for (i=0;i<nee;i++) { 999 PetscBool has_candidates = PETSC_FALSE; 1000 if (PetscBTLookup(bter,i)) { 1001 PetscInt size,mark = i+1; 1002 1003 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1004 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1005 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1006 for (j=0;j<size;j++) { 1007 PetscInt k,ee = idxs[j]; 1008 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1009 for (k=ii[ee];k<ii[ee+1];k++) { 1010 /* set all candidates located on the edge as corners */ 1011 if (PetscBTLookup(btvcand,jj[k])) { 1012 PetscInt k2,vv = jj[k]; 1013 has_candidates = PETSC_TRUE; 1014 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1015 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1016 /* set all edge dofs connected to candidate as primals */ 1017 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1018 if (marks[jjt[k2]] == mark) { 1019 PetscInt k3,ee2 = jjt[k2]; 1020 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1021 newprimals[cum++] = ee2; 1022 /* finally set the new corners */ 1023 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1024 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1025 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1026 } 1027 } 1028 } 1029 } else { 1030 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1031 } 1032 } 1033 } 1034 if (!has_candidates) { /* circular edge */ 1035 PetscInt k, ee = idxs[0],*tmarks; 1036 1037 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1038 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1039 for (k=ii[ee];k<ii[ee+1];k++) { 1040 PetscInt k2; 1041 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1042 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1043 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1044 } 1045 for (j=0;j<size;j++) { 1046 if (tmarks[idxs[j]] > 1) { 1047 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1048 newprimals[cum++] = idxs[j]; 1049 } 1050 } 1051 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1052 } 1053 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1054 } 1055 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1056 } 1057 ierr = PetscFree(extcols);CHKERRQ(ierr); 1058 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1059 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1060 if (fl2g) { 1061 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1062 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1063 for (i=0;i<nee;i++) { 1064 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1065 } 1066 ierr = PetscFree(eedges);CHKERRQ(ierr); 1067 } 1068 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1069 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1070 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1071 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1072 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1073 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1074 pcbddc->mat_graph->twodim = PETSC_FALSE; 1075 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1076 if (fl2g) { 1077 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1078 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1079 for (i=0;i<nee;i++) { 1080 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1081 } 1082 } else { 1083 eedges = alleedges; 1084 primals = allprimals; 1085 } 1086 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1087 1088 /* Mark again */ 1089 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1090 for (i=0;i<nee;i++) { 1091 PetscInt size,mark = i+1; 1092 1093 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1094 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1096 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1097 } 1098 if (print) { 1099 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1100 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1101 } 1102 1103 /* Recompute extended cols */ 1104 eerr = PETSC_FALSE; 1105 for (i=0;i<nee;i++) { 1106 PetscInt size; 1107 1108 cum = 0; 1109 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1110 if (!size && nedfieldlocal) continue; 1111 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1112 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1113 for (j=0;j<size;j++) { 1114 PetscInt k,ee = idxs[j]; 1115 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1116 } 1117 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1118 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1119 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1120 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1121 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1122 if (cum != size -1) { 1123 if (print) { 1124 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1127 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1128 } 1129 eerr = PETSC_TRUE; 1130 } 1131 } 1132 } 1133 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1134 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1135 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1136 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1137 /* an error should not occur at this point */ 1138 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1139 1140 /* Check the number of endpoints */ 1141 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1142 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1143 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1144 for (i=0;i<nee;i++) { 1145 PetscInt size, found = 0, gc[2]; 1146 1147 /* init with defaults */ 1148 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1149 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1150 if (!size && nedfieldlocal) continue; 1151 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1152 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1153 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1154 for (j=0;j<size;j++) { 1155 PetscInt k,ee = idxs[j]; 1156 for (k=ii[ee];k<ii[ee+1];k++) { 1157 PetscInt vv = jj[k]; 1158 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1159 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1160 corners[i*2+found++] = vv; 1161 } 1162 } 1163 } 1164 if (found != 2) { 1165 PetscInt e; 1166 if (fl2g) { 1167 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1168 } else { 1169 e = idxs[0]; 1170 } 1171 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1172 } 1173 1174 /* get primal dof index on this coarse edge */ 1175 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1176 if (gc[0] > gc[1]) { 1177 PetscInt swap = corners[2*i]; 1178 corners[2*i] = corners[2*i+1]; 1179 corners[2*i+1] = swap; 1180 } 1181 cedges[i] = idxs[size-1]; 1182 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1183 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1184 } 1185 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1186 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1187 1188 #if defined(PETSC_USE_DEBUG) 1189 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1190 not interfere with neighbouring coarse edges */ 1191 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1192 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1193 for (i=0;i<nv;i++) { 1194 PetscInt emax = 0,eemax = 0; 1195 1196 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1197 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1198 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1199 for (j=1;j<nee+1;j++) { 1200 if (emax < emarks[j]) { 1201 emax = emarks[j]; 1202 eemax = j; 1203 } 1204 } 1205 /* not relevant for edges */ 1206 if (!eemax) continue; 1207 1208 for (j=ii[i];j<ii[i+1];j++) { 1209 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1210 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1211 } 1212 } 1213 } 1214 ierr = PetscFree(emarks);CHKERRQ(ierr); 1215 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1216 #endif 1217 1218 /* Compute extended rows indices for edge blocks of the change of basis */ 1219 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1220 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1221 extmem *= maxsize; 1222 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1223 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1224 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1225 for (i=0;i<nv;i++) { 1226 PetscInt mark = 0,size,start; 1227 1228 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1229 for (j=ii[i];j<ii[i+1];j++) 1230 if (marks[jj[j]] && !mark) 1231 mark = marks[jj[j]]; 1232 1233 /* not relevant */ 1234 if (!mark) continue; 1235 1236 /* import extended row */ 1237 mark--; 1238 start = mark*extmem+extrowcum[mark]; 1239 size = ii[i+1]-ii[i]; 1240 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1241 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1242 extrowcum[mark] += size; 1243 } 1244 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1245 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1246 ierr = PetscFree(marks);CHKERRQ(ierr); 1247 1248 /* Compress extrows */ 1249 cum = 0; 1250 for (i=0;i<nee;i++) { 1251 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1252 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1253 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1254 cum = PetscMax(cum,size); 1255 } 1256 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1257 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1258 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1259 1260 /* Workspace for lapack inner calls and VecSetValues */ 1261 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1262 1263 /* Create change of basis matrix (preallocation can be improved) */ 1264 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1265 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1266 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1267 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1268 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1269 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1270 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1271 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1272 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1273 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1274 1275 /* Defaults to identity */ 1276 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1277 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1278 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1279 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1280 1281 /* Create discrete gradient for the coarser level if needed */ 1282 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1283 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1284 if (pcbddc->current_level < pcbddc->max_levels) { 1285 ISLocalToGlobalMapping cel2g,cvl2g; 1286 IS wis,gwis; 1287 PetscInt cnv,cne; 1288 1289 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1290 if (fl2g) { 1291 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1292 } else { 1293 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1294 pcbddc->nedclocal = wis; 1295 } 1296 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1297 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1298 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1299 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1300 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1301 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1302 1303 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1304 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1305 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1306 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1307 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1308 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1309 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1310 1311 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1312 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1313 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1314 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1315 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1316 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1317 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1318 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1319 } 1320 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1321 1322 #if defined(PRINT_GDET) 1323 inc = 0; 1324 lev = pcbddc->current_level; 1325 #endif 1326 1327 /* Insert values in the change of basis matrix */ 1328 for (i=0;i<nee;i++) { 1329 Mat Gins = NULL, GKins = NULL; 1330 IS cornersis = NULL; 1331 PetscScalar cvals[2]; 1332 1333 if (pcbddc->nedcG) { 1334 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1335 } 1336 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1337 if (Gins && GKins) { 1338 PetscScalar *data; 1339 const PetscInt *rows,*cols; 1340 PetscInt nrh,nch,nrc,ncc; 1341 1342 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1343 /* H1 */ 1344 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1345 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1346 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1348 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1349 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1350 /* complement */ 1351 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1352 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1353 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1354 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1355 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1356 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1357 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1358 1359 /* coarse discrete gradient */ 1360 if (pcbddc->nedcG) { 1361 PetscInt cols[2]; 1362 1363 cols[0] = 2*i; 1364 cols[1] = 2*i+1; 1365 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1366 } 1367 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1368 } 1369 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1370 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1371 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1372 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1373 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1374 } 1375 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1376 1377 /* Start assembling */ 1378 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 if (pcbddc->nedcG) { 1380 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1381 } 1382 1383 /* Free */ 1384 if (fl2g) { 1385 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1386 for (i=0;i<nee;i++) { 1387 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1388 } 1389 ierr = PetscFree(eedges);CHKERRQ(ierr); 1390 } 1391 1392 /* hack mat_graph with primal dofs on the coarse edges */ 1393 { 1394 PCBDDCGraph graph = pcbddc->mat_graph; 1395 PetscInt *oqueue = graph->queue; 1396 PetscInt *ocptr = graph->cptr; 1397 PetscInt ncc,*idxs; 1398 1399 /* find first primal edge */ 1400 if (pcbddc->nedclocal) { 1401 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1402 } else { 1403 if (fl2g) { 1404 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1405 } 1406 idxs = cedges; 1407 } 1408 cum = 0; 1409 while (cum < nee && cedges[cum] < 0) cum++; 1410 1411 /* adapt connected components */ 1412 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1413 graph->cptr[0] = 0; 1414 for (i=0,ncc=0;i<graph->ncc;i++) { 1415 PetscInt lc = ocptr[i+1]-ocptr[i]; 1416 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1417 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1418 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1419 ncc++; 1420 lc--; 1421 cum++; 1422 while (cum < nee && cedges[cum] < 0) cum++; 1423 } 1424 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1425 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1426 ncc++; 1427 } 1428 graph->ncc = ncc; 1429 if (pcbddc->nedclocal) { 1430 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1431 } 1432 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1433 } 1434 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1435 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1436 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1437 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1438 1439 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1440 ierr = PetscFree(extrow);CHKERRQ(ierr); 1441 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1442 ierr = PetscFree(corners);CHKERRQ(ierr); 1443 ierr = PetscFree(cedges);CHKERRQ(ierr); 1444 ierr = PetscFree(extrows);CHKERRQ(ierr); 1445 ierr = PetscFree(extcols);CHKERRQ(ierr); 1446 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1447 1448 /* Complete assembling */ 1449 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 if (pcbddc->nedcG) { 1451 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1452 #if 0 1453 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1454 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1455 #endif 1456 } 1457 1458 /* set change of basis */ 1459 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1460 ierr = MatDestroy(&T);CHKERRQ(ierr); 1461 1462 PetscFunctionReturn(0); 1463 } 1464 1465 /* the near-null space of BDDC carries information on quadrature weights, 1466 and these can be collinear -> so cheat with MatNullSpaceCreate 1467 and create a suitable set of basis vectors first */ 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1470 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1471 { 1472 PetscErrorCode ierr; 1473 PetscInt i; 1474 1475 PetscFunctionBegin; 1476 for (i=0;i<nvecs;i++) { 1477 PetscInt first,last; 1478 1479 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1480 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1481 if (i>=first && i < last) { 1482 PetscScalar *data; 1483 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1484 if (!has_const) { 1485 data[i-first] = 1.; 1486 } else { 1487 data[2*i-first] = 1./PetscSqrtReal(2.); 1488 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1489 } 1490 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1491 } 1492 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1493 } 1494 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1495 for (i=0;i<nvecs;i++) { /* reset vectors */ 1496 PetscInt first,last; 1497 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1498 if (i>=first && i < last) { 1499 PetscScalar *data; 1500 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1501 if (!has_const) { 1502 data[i-first] = 0.; 1503 } else { 1504 data[2*i-first] = 0.; 1505 data[2*i-first+1] = 0.; 1506 } 1507 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1508 } 1509 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1516 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1517 { 1518 Mat loc_divudotp; 1519 Vec p,v,vins,quad_vec,*quad_vecs; 1520 ISLocalToGlobalMapping map; 1521 IS *faces,*edges; 1522 PetscScalar *vals; 1523 const PetscScalar *array; 1524 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1525 PetscMPIInt rank; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1530 if (graph->twodim) { 1531 lmaxneighs = 2; 1532 } else { 1533 lmaxneighs = 1; 1534 for (i=0;i<ne;i++) { 1535 const PetscInt *idxs; 1536 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1537 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1538 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1539 } 1540 lmaxneighs++; /* graph count does not include self */ 1541 } 1542 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1543 maxsize = 0; 1544 for (i=0;i<ne;i++) { 1545 PetscInt nn; 1546 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1547 maxsize = PetscMax(maxsize,nn); 1548 } 1549 for (i=0;i<nf;i++) { 1550 PetscInt nn; 1551 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1552 maxsize = PetscMax(maxsize,nn); 1553 } 1554 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1555 /* create vectors to hold quadrature weights */ 1556 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1557 if (!transpose) { 1558 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1559 } else { 1560 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1561 } 1562 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1563 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1564 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1565 for (i=0;i<maxneighs;i++) { 1566 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1567 } 1568 1569 /* compute local quad vec */ 1570 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1571 if (!transpose) { 1572 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1573 } else { 1574 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1575 } 1576 ierr = VecSet(p,1.);CHKERRQ(ierr); 1577 if (!transpose) { 1578 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1579 } else { 1580 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1581 } 1582 if (vl2l) { 1583 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1584 } else { 1585 vins = v; 1586 } 1587 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1588 ierr = VecDestroy(&p);CHKERRQ(ierr); 1589 1590 /* insert in global quadrature vecs */ 1591 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1592 for (i=0;i<nf;i++) { 1593 const PetscInt *idxs; 1594 PetscInt idx,nn,j; 1595 1596 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1597 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1598 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1599 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1600 idx = -(idx+1); 1601 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1602 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1603 } 1604 for (i=0;i<ne;i++) { 1605 const PetscInt *idxs; 1606 PetscInt idx,nn,j; 1607 1608 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1609 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1610 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1611 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1612 idx = -(idx+1); 1613 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1614 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1615 } 1616 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1617 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1618 if (vl2l) { 1619 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1620 } 1621 ierr = VecDestroy(&v);CHKERRQ(ierr); 1622 ierr = PetscFree(vals);CHKERRQ(ierr); 1623 1624 /* assemble near null space */ 1625 for (i=0;i<maxneighs;i++) { 1626 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1627 } 1628 for (i=0;i<maxneighs;i++) { 1629 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1630 } 1631 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1638 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1639 { 1640 PetscErrorCode ierr; 1641 Vec local,global; 1642 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1643 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1644 1645 PetscFunctionBegin; 1646 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1647 /* need to convert from global to local topology information and remove references to information in global ordering */ 1648 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1649 if (pcbddc->user_provided_isfordofs) { 1650 if (pcbddc->n_ISForDofs) { 1651 PetscInt i; 1652 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1653 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1654 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1655 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1656 } 1657 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1658 pcbddc->n_ISForDofs = 0; 1659 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1660 } 1661 } else { 1662 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1663 PetscInt i, n = matis->A->rmap->n; 1664 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1665 if (i > 1) { 1666 pcbddc->n_ISForDofsLocal = i; 1667 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1668 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1669 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1670 } 1671 } 1672 } 1673 } 1674 1675 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1676 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1677 } 1678 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1679 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1680 } 1681 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1682 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1683 } 1684 ierr = VecDestroy(&global);CHKERRQ(ierr); 1685 ierr = VecDestroy(&local);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1691 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1692 { 1693 PC_IS *pcis = (PC_IS*)(pc->data); 1694 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1695 PetscErrorCode ierr; 1696 1697 PetscFunctionBegin; 1698 if (!pcbddc->benign_have_null) { 1699 PetscFunctionReturn(0); 1700 } 1701 if (pcbddc->ChangeOfBasisMatrix) { 1702 Vec swap; 1703 1704 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1705 swap = pcbddc->work_change; 1706 pcbddc->work_change = r; 1707 r = swap; 1708 } 1709 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1710 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1711 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1712 ierr = VecSet(z,0.);CHKERRQ(ierr); 1713 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1714 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1715 if (pcbddc->ChangeOfBasisMatrix) { 1716 pcbddc->work_change = r; 1717 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1718 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1725 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1726 { 1727 PCBDDCBenignMatMult_ctx ctx; 1728 PetscErrorCode ierr; 1729 PetscBool apply_right,apply_left,reset_x; 1730 1731 PetscFunctionBegin; 1732 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1733 if (transpose) { 1734 apply_right = ctx->apply_left; 1735 apply_left = ctx->apply_right; 1736 } else { 1737 apply_right = ctx->apply_right; 1738 apply_left = ctx->apply_left; 1739 } 1740 reset_x = PETSC_FALSE; 1741 if (apply_right) { 1742 const PetscScalar *ax; 1743 PetscInt nl,i; 1744 1745 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1746 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1747 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1748 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1749 for (i=0;i<ctx->benign_n;i++) { 1750 PetscScalar sum,val; 1751 const PetscInt *idxs; 1752 PetscInt nz,j; 1753 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1754 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1755 sum = 0.; 1756 if (ctx->apply_p0) { 1757 val = ctx->work[idxs[nz-1]]; 1758 for (j=0;j<nz-1;j++) { 1759 sum += ctx->work[idxs[j]]; 1760 ctx->work[idxs[j]] += val; 1761 } 1762 } else { 1763 for (j=0;j<nz-1;j++) { 1764 sum += ctx->work[idxs[j]]; 1765 } 1766 } 1767 ctx->work[idxs[nz-1]] -= sum; 1768 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1769 } 1770 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1771 reset_x = PETSC_TRUE; 1772 } 1773 if (transpose) { 1774 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1775 } else { 1776 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1777 } 1778 if (reset_x) { 1779 ierr = VecResetArray(x);CHKERRQ(ierr); 1780 } 1781 if (apply_left) { 1782 PetscScalar *ay; 1783 PetscInt i; 1784 1785 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1786 for (i=0;i<ctx->benign_n;i++) { 1787 PetscScalar sum,val; 1788 const PetscInt *idxs; 1789 PetscInt nz,j; 1790 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1791 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1792 val = -ay[idxs[nz-1]]; 1793 if (ctx->apply_p0) { 1794 sum = 0.; 1795 for (j=0;j<nz-1;j++) { 1796 sum += ay[idxs[j]]; 1797 ay[idxs[j]] += val; 1798 } 1799 ay[idxs[nz-1]] += sum; 1800 } else { 1801 for (j=0;j<nz-1;j++) { 1802 ay[idxs[j]] += val; 1803 } 1804 ay[idxs[nz-1]] = 0.; 1805 } 1806 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1807 } 1808 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1815 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1816 { 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1826 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1827 { 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "PCBDDCBenignShellMat" 1837 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1838 { 1839 PC_IS *pcis = (PC_IS*)pc->data; 1840 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1841 PCBDDCBenignMatMult_ctx ctx; 1842 PetscErrorCode ierr; 1843 1844 PetscFunctionBegin; 1845 if (!restore) { 1846 Mat A_IB,A_BI; 1847 PetscScalar *work; 1848 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1849 1850 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1851 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1852 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1853 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1854 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1855 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1856 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1857 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1858 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1859 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1860 ctx->apply_left = PETSC_TRUE; 1861 ctx->apply_right = PETSC_FALSE; 1862 ctx->apply_p0 = PETSC_FALSE; 1863 ctx->benign_n = pcbddc->benign_n; 1864 if (reuse) { 1865 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1866 ctx->free = PETSC_FALSE; 1867 } else { /* TODO: could be optimized for successive solves */ 1868 ISLocalToGlobalMapping N_to_D; 1869 PetscInt i; 1870 1871 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1872 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1873 for (i=0;i<pcbddc->benign_n;i++) { 1874 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1875 } 1876 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1877 ctx->free = PETSC_TRUE; 1878 } 1879 ctx->A = pcis->A_IB; 1880 ctx->work = work; 1881 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1882 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1883 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1884 pcis->A_IB = A_IB; 1885 1886 /* A_BI as A_IB^T */ 1887 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1888 pcbddc->benign_original_mat = pcis->A_BI; 1889 pcis->A_BI = A_BI; 1890 } else { 1891 if (!pcbddc->benign_original_mat) { 1892 PetscFunctionReturn(0); 1893 } 1894 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1895 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1896 pcis->A_IB = ctx->A; 1897 ctx->A = NULL; 1898 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1899 pcis->A_BI = pcbddc->benign_original_mat; 1900 pcbddc->benign_original_mat = NULL; 1901 if (ctx->free) { 1902 PetscInt i; 1903 for (i=0;i<ctx->benign_n;i++) { 1904 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1905 } 1906 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1907 } 1908 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1909 ierr = PetscFree(ctx);CHKERRQ(ierr); 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 /* used just in bddc debug mode */ 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "PCBDDCBenignProject" 1917 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1918 { 1919 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1920 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1921 Mat An; 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1926 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1927 if (is1) { 1928 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1929 ierr = MatDestroy(&An);CHKERRQ(ierr); 1930 } else { 1931 *B = An; 1932 } 1933 PetscFunctionReturn(0); 1934 } 1935 1936 /* TODO: add reuse flag */ 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatSeqAIJCompress" 1939 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1940 { 1941 Mat Bt; 1942 PetscScalar *a,*bdata; 1943 const PetscInt *ii,*ij; 1944 PetscInt m,n,i,nnz,*bii,*bij; 1945 PetscBool flg_row; 1946 PetscErrorCode ierr; 1947 1948 PetscFunctionBegin; 1949 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1950 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1951 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1952 nnz = n; 1953 for (i=0;i<ii[n];i++) { 1954 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1955 } 1956 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1957 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1958 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1959 nnz = 0; 1960 bii[0] = 0; 1961 for (i=0;i<n;i++) { 1962 PetscInt j; 1963 for (j=ii[i];j<ii[i+1];j++) { 1964 PetscScalar entry = a[j]; 1965 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1966 bij[nnz] = ij[j]; 1967 bdata[nnz] = entry; 1968 nnz++; 1969 } 1970 } 1971 bii[i+1] = nnz; 1972 } 1973 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1974 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1975 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1976 { 1977 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1978 b->free_a = PETSC_TRUE; 1979 b->free_ij = PETSC_TRUE; 1980 } 1981 *B = Bt; 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #undef __FUNCT__ 1986 #define __FUNCT__ "MatDetectDisconnectedComponents" 1987 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1988 { 1989 Mat B; 1990 IS is_dummy,*cc_n; 1991 ISLocalToGlobalMapping l2gmap_dummy; 1992 PCBDDCGraph graph; 1993 PetscInt i,n; 1994 PetscInt *xadj,*adjncy; 1995 PetscInt *xadj_filtered,*adjncy_filtered; 1996 PetscBool flg_row,isseqaij; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 if (!A->rmap->N || !A->cmap->N) { 2001 *ncc = 0; 2002 *cc = NULL; 2003 PetscFunctionReturn(0); 2004 } 2005 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2006 if (!isseqaij && filter) { 2007 PetscBool isseqdense; 2008 2009 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2010 if (!isseqdense) { 2011 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2012 } else { /* TODO: rectangular case and LDA */ 2013 PetscScalar *array; 2014 PetscReal chop=1.e-6; 2015 2016 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2017 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2018 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2019 for (i=0;i<n;i++) { 2020 PetscInt j; 2021 for (j=i+1;j<n;j++) { 2022 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2023 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2024 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2025 } 2026 } 2027 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2028 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2029 } 2030 } else { 2031 B = A; 2032 } 2033 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2034 2035 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2036 if (filter) { 2037 PetscScalar *data; 2038 PetscInt j,cum; 2039 2040 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2041 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2042 cum = 0; 2043 for (i=0;i<n;i++) { 2044 PetscInt t; 2045 2046 for (j=xadj[i];j<xadj[i+1];j++) { 2047 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2048 continue; 2049 } 2050 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2051 } 2052 t = xadj_filtered[i]; 2053 xadj_filtered[i] = cum; 2054 cum += t; 2055 } 2056 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2057 } else { 2058 xadj_filtered = NULL; 2059 adjncy_filtered = NULL; 2060 } 2061 2062 /* compute local connected components using PCBDDCGraph */ 2063 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2064 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2065 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2066 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2067 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2068 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2069 if (xadj_filtered) { 2070 graph->xadj = xadj_filtered; 2071 graph->adjncy = adjncy_filtered; 2072 } else { 2073 graph->xadj = xadj; 2074 graph->adjncy = adjncy; 2075 } 2076 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2077 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2078 /* partial clean up */ 2079 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2080 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2081 if (A != B) { 2082 ierr = MatDestroy(&B);CHKERRQ(ierr); 2083 } 2084 2085 /* get back data */ 2086 if (ncc) *ncc = graph->ncc; 2087 if (cc) { 2088 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2089 for (i=0;i<graph->ncc;i++) { 2090 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2091 } 2092 *cc = cc_n; 2093 } 2094 /* clean up graph */ 2095 graph->xadj = 0; 2096 graph->adjncy = 0; 2097 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "PCBDDCBenignCheck" 2103 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2104 { 2105 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2106 PC_IS* pcis = (PC_IS*)(pc->data); 2107 IS dirIS = NULL; 2108 PetscInt i; 2109 PetscErrorCode ierr; 2110 2111 PetscFunctionBegin; 2112 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2113 if (zerodiag) { 2114 Mat A; 2115 Vec vec3_N; 2116 PetscScalar *vals; 2117 const PetscInt *idxs; 2118 PetscInt nz,*count; 2119 2120 /* p0 */ 2121 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2122 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2123 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2124 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2125 for (i=0;i<nz;i++) vals[i] = 1.; 2126 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2127 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2128 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2129 /* v_I */ 2130 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2131 for (i=0;i<nz;i++) vals[i] = 0.; 2132 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2133 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2134 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2135 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2136 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2137 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2138 if (dirIS) { 2139 PetscInt n; 2140 2141 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2142 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2143 for (i=0;i<n;i++) vals[i] = 0.; 2144 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2145 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2146 } 2147 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2148 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2149 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2150 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2151 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2152 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2153 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2154 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2155 ierr = PetscFree(vals);CHKERRQ(ierr); 2156 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2157 2158 /* there should not be any pressure dofs lying on the interface */ 2159 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2160 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2161 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2162 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2163 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2164 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2165 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2166 ierr = PetscFree(count);CHKERRQ(ierr); 2167 } 2168 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2169 2170 /* check PCBDDCBenignGetOrSetP0 */ 2171 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2172 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2173 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2174 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2175 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2176 for (i=0;i<pcbddc->benign_n;i++) { 2177 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2178 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2179 } 2180 PetscFunctionReturn(0); 2181 } 2182 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 2185 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2186 { 2187 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2188 IS pressures,zerodiag,*zerodiag_subs; 2189 PetscInt nz,n; 2190 PetscInt *interior_dofs,n_interior_dofs; 2191 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 2192 PetscErrorCode ierr; 2193 2194 PetscFunctionBegin; 2195 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2196 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2197 for (n=0;n<pcbddc->benign_n;n++) { 2198 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2199 } 2200 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2201 pcbddc->benign_n = 0; 2202 /* if a local info on dofs is present, assumes that the last field represents "pressures" 2203 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2204 Checks if all the pressure dofs in each subdomain have a zero diagonal 2205 If not, a change of basis on pressures is not needed 2206 since the local Schur complements are already SPD 2207 */ 2208 has_null_pressures = PETSC_TRUE; 2209 have_null = PETSC_TRUE; 2210 if (pcbddc->n_ISForDofsLocal) { 2211 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2212 2213 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2214 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2215 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2216 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2217 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2218 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2219 if (!sorted) { 2220 ierr = ISSort(pressures);CHKERRQ(ierr); 2221 } 2222 } else { 2223 pressures = NULL; 2224 } 2225 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2226 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2227 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2228 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2229 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2230 if (!sorted) { 2231 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2232 } 2233 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2234 if (!nz) { 2235 if (n) have_null = PETSC_FALSE; 2236 has_null_pressures = PETSC_FALSE; 2237 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2238 } 2239 recompute_zerodiag = PETSC_FALSE; 2240 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2241 zerodiag_subs = NULL; 2242 pcbddc->benign_n = 0; 2243 n_interior_dofs = 0; 2244 interior_dofs = NULL; 2245 if (pcbddc->current_level) { /* need to compute interior nodes */ 2246 PetscInt n,i,j; 2247 PetscInt n_neigh,*neigh,*n_shared,**shared; 2248 PetscInt *iwork; 2249 2250 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2251 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2252 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2253 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2254 for (i=1;i<n_neigh;i++) 2255 for (j=0;j<n_shared[i];j++) 2256 iwork[shared[i][j]] += 1; 2257 for (i=0;i<n;i++) 2258 if (!iwork[i]) 2259 interior_dofs[n_interior_dofs++] = i; 2260 ierr = PetscFree(iwork);CHKERRQ(ierr); 2261 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2262 } 2263 if (has_null_pressures) { 2264 IS *subs; 2265 PetscInt nsubs,i,j,nl; 2266 const PetscInt *idxs; 2267 PetscScalar *array; 2268 Vec *work; 2269 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2270 2271 subs = pcbddc->local_subs; 2272 nsubs = pcbddc->n_local_subs; 2273 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2274 if (pcbddc->current_level) { 2275 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2276 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2277 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2278 /* work[0] = 1_p */ 2279 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2280 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2281 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2282 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2283 /* work[0] = 1_v */ 2284 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2285 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2286 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2287 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2288 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2289 } 2290 if (nsubs > 1) { 2291 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2292 for (i=0;i<nsubs;i++) { 2293 ISLocalToGlobalMapping l2g; 2294 IS t_zerodiag_subs; 2295 PetscInt nl; 2296 2297 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2298 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2299 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2300 if (nl) { 2301 PetscBool valid = PETSC_TRUE; 2302 2303 if (pcbddc->current_level) { 2304 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2305 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2306 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2307 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2308 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2309 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2310 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2311 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2312 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2313 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2314 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2315 for (j=0;j<n_interior_dofs;j++) { 2316 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2317 valid = PETSC_FALSE; 2318 break; 2319 } 2320 } 2321 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2322 } 2323 if (valid && pcbddc->NeumannBoundariesLocal) { 2324 IS t_bc; 2325 PetscInt nzb; 2326 2327 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 2328 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 2329 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 2330 if (nzb) valid = PETSC_FALSE; 2331 } 2332 if (valid && pressures) { 2333 IS t_pressure_subs; 2334 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2335 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2336 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2337 } 2338 if (valid) { 2339 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2340 pcbddc->benign_n++; 2341 } else { 2342 recompute_zerodiag = PETSC_TRUE; 2343 } 2344 } 2345 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2346 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2347 } 2348 } else { /* there's just one subdomain (or zero if they have not been detected */ 2349 PetscBool valid = PETSC_TRUE; 2350 2351 if (pcbddc->NeumannBoundariesLocal) { 2352 PetscInt nzb; 2353 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 2354 if (nzb) valid = PETSC_FALSE; 2355 } 2356 if (valid && pressures) { 2357 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2358 } 2359 if (valid && pcbddc->current_level) { 2360 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2361 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2362 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2363 for (j=0;j<n_interior_dofs;j++) { 2364 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2365 valid = PETSC_FALSE; 2366 break; 2367 } 2368 } 2369 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2370 } 2371 if (valid) { 2372 pcbddc->benign_n = 1; 2373 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2374 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2375 zerodiag_subs[0] = zerodiag; 2376 } 2377 } 2378 if (pcbddc->current_level) { 2379 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2380 } 2381 } 2382 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2383 2384 if (!pcbddc->benign_n) { 2385 PetscInt n; 2386 2387 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2388 recompute_zerodiag = PETSC_FALSE; 2389 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2390 if (n) { 2391 has_null_pressures = PETSC_FALSE; 2392 have_null = PETSC_FALSE; 2393 } 2394 } 2395 2396 /* final check for null pressures */ 2397 if (zerodiag && pressures) { 2398 PetscInt nz,np; 2399 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2400 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2401 if (nz != np) have_null = PETSC_FALSE; 2402 } 2403 2404 if (recompute_zerodiag) { 2405 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2406 if (pcbddc->benign_n == 1) { 2407 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2408 zerodiag = zerodiag_subs[0]; 2409 } else { 2410 PetscInt i,nzn,*new_idxs; 2411 2412 nzn = 0; 2413 for (i=0;i<pcbddc->benign_n;i++) { 2414 PetscInt ns; 2415 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2416 nzn += ns; 2417 } 2418 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2419 nzn = 0; 2420 for (i=0;i<pcbddc->benign_n;i++) { 2421 PetscInt ns,*idxs; 2422 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2423 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2424 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2425 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2426 nzn += ns; 2427 } 2428 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2429 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2430 } 2431 have_null = PETSC_FALSE; 2432 } 2433 2434 /* Prepare matrix to compute no-net-flux */ 2435 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2436 Mat A,loc_divudotp; 2437 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2438 IS row,col,isused = NULL; 2439 PetscInt M,N,n,st,n_isused; 2440 2441 if (pressures) { 2442 isused = pressures; 2443 } else { 2444 isused = zerodiag; 2445 } 2446 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2447 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2448 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2449 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2450 n_isused = 0; 2451 if (isused) { 2452 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2453 } 2454 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2455 st = st-n_isused; 2456 if (n) { 2457 const PetscInt *gidxs; 2458 2459 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2460 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2461 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2462 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2463 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2464 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2465 } else { 2466 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2467 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2468 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2469 } 2470 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2471 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2472 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2473 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2474 ierr = ISDestroy(&row);CHKERRQ(ierr); 2475 ierr = ISDestroy(&col);CHKERRQ(ierr); 2476 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2477 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2478 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2479 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2480 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2481 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2482 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2483 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2484 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2485 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2486 } 2487 2488 /* change of basis and p0 dofs */ 2489 if (has_null_pressures) { 2490 IS zerodiagc; 2491 const PetscInt *idxs,*idxsc; 2492 PetscInt i,s,*nnz; 2493 2494 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2495 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2496 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2497 /* local change of basis for pressures */ 2498 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2499 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2500 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2501 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2502 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2503 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2504 for (i=0;i<pcbddc->benign_n;i++) { 2505 PetscInt nzs,j; 2506 2507 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2508 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2509 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2510 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2511 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2512 } 2513 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2514 ierr = PetscFree(nnz);CHKERRQ(ierr); 2515 /* set identity on velocities */ 2516 for (i=0;i<n-nz;i++) { 2517 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2518 } 2519 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2520 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2521 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2522 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2523 /* set change on pressures */ 2524 for (s=0;s<pcbddc->benign_n;s++) { 2525 PetscScalar *array; 2526 PetscInt nzs; 2527 2528 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2529 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2530 for (i=0;i<nzs-1;i++) { 2531 PetscScalar vals[2]; 2532 PetscInt cols[2]; 2533 2534 cols[0] = idxs[i]; 2535 cols[1] = idxs[nzs-1]; 2536 vals[0] = 1.; 2537 vals[1] = 1.; 2538 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2539 } 2540 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2541 for (i=0;i<nzs-1;i++) array[i] = -1.; 2542 array[nzs-1] = 1.; 2543 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2544 /* store local idxs for p0 */ 2545 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2546 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2547 ierr = PetscFree(array);CHKERRQ(ierr); 2548 } 2549 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2550 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2551 /* project if needed */ 2552 if (pcbddc->benign_change_explicit) { 2553 Mat M; 2554 2555 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2556 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2557 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2558 ierr = MatDestroy(&M);CHKERRQ(ierr); 2559 } 2560 /* store global idxs for p0 */ 2561 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2562 } 2563 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2564 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2565 2566 /* determines if the coarse solver will be singular or not */ 2567 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2568 /* determines if the problem has subdomains with 0 pressure block */ 2569 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2570 *zerodiaglocal = zerodiag; 2571 PetscFunctionReturn(0); 2572 } 2573 2574 #undef __FUNCT__ 2575 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2576 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2577 { 2578 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2579 PetscScalar *array; 2580 PetscErrorCode ierr; 2581 2582 PetscFunctionBegin; 2583 if (!pcbddc->benign_sf) { 2584 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2585 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2586 } 2587 if (get) { 2588 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2589 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2590 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2591 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2592 } else { 2593 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2594 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2595 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2596 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2597 } 2598 PetscFunctionReturn(0); 2599 } 2600 2601 #undef __FUNCT__ 2602 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2603 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2604 { 2605 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2606 PetscErrorCode ierr; 2607 2608 PetscFunctionBegin; 2609 /* TODO: add error checking 2610 - avoid nested pop (or push) calls. 2611 - cannot push before pop. 2612 - cannot call this if pcbddc->local_mat is NULL 2613 */ 2614 if (!pcbddc->benign_n) { 2615 PetscFunctionReturn(0); 2616 } 2617 if (pop) { 2618 if (pcbddc->benign_change_explicit) { 2619 IS is_p0; 2620 MatReuse reuse; 2621 2622 /* extract B_0 */ 2623 reuse = MAT_INITIAL_MATRIX; 2624 if (pcbddc->benign_B0) { 2625 reuse = MAT_REUSE_MATRIX; 2626 } 2627 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2628 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2629 /* remove rows and cols from local problem */ 2630 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2631 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2632 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2633 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2634 } else { 2635 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2636 PetscScalar *vals; 2637 PetscInt i,n,*idxs_ins; 2638 2639 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2640 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2641 if (!pcbddc->benign_B0) { 2642 PetscInt *nnz; 2643 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2644 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2645 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2646 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2647 for (i=0;i<pcbddc->benign_n;i++) { 2648 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2649 nnz[i] = n - nnz[i]; 2650 } 2651 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2652 ierr = PetscFree(nnz);CHKERRQ(ierr); 2653 } 2654 2655 for (i=0;i<pcbddc->benign_n;i++) { 2656 PetscScalar *array; 2657 PetscInt *idxs,j,nz,cum; 2658 2659 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2660 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2661 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2662 for (j=0;j<nz;j++) vals[j] = 1.; 2663 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2664 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2665 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2666 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2667 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2668 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2669 cum = 0; 2670 for (j=0;j<n;j++) { 2671 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2672 vals[cum] = array[j]; 2673 idxs_ins[cum] = j; 2674 cum++; 2675 } 2676 } 2677 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2678 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2679 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2680 } 2681 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2682 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2683 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2684 } 2685 } else { /* push */ 2686 if (pcbddc->benign_change_explicit) { 2687 PetscInt i; 2688 2689 for (i=0;i<pcbddc->benign_n;i++) { 2690 PetscScalar *B0_vals; 2691 PetscInt *B0_cols,B0_ncol; 2692 2693 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2694 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2695 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2696 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2697 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2698 } 2699 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2700 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2701 } else { 2702 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2703 } 2704 } 2705 PetscFunctionReturn(0); 2706 } 2707 2708 #undef __FUNCT__ 2709 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2710 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2711 { 2712 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2713 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2714 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2715 PetscBLASInt *B_iwork,*B_ifail; 2716 PetscScalar *work,lwork; 2717 PetscScalar *St,*S,*eigv; 2718 PetscScalar *Sarray,*Starray; 2719 PetscReal *eigs,thresh; 2720 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2721 PetscBool allocated_S_St; 2722 #if defined(PETSC_USE_COMPLEX) 2723 PetscReal *rwork; 2724 #endif 2725 PetscErrorCode ierr; 2726 2727 PetscFunctionBegin; 2728 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2729 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2730 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2731 2732 if (pcbddc->dbg_flag) { 2733 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2734 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2735 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2736 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2737 } 2738 2739 if (pcbddc->dbg_flag) { 2740 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2741 } 2742 2743 /* max size of subsets */ 2744 mss = 0; 2745 for (i=0;i<sub_schurs->n_subs;i++) { 2746 PetscInt subset_size; 2747 2748 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2749 mss = PetscMax(mss,subset_size); 2750 } 2751 2752 /* min/max and threshold */ 2753 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2754 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2755 nmax = PetscMax(nmin,nmax); 2756 allocated_S_St = PETSC_FALSE; 2757 if (nmin) { 2758 allocated_S_St = PETSC_TRUE; 2759 } 2760 2761 /* allocate lapack workspace */ 2762 cum = cum2 = 0; 2763 maxneigs = 0; 2764 for (i=0;i<sub_schurs->n_subs;i++) { 2765 PetscInt n,subset_size; 2766 2767 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2768 n = PetscMin(subset_size,nmax); 2769 cum += subset_size; 2770 cum2 += subset_size*n; 2771 maxneigs = PetscMax(maxneigs,n); 2772 } 2773 if (mss) { 2774 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2775 PetscBLASInt B_itype = 1; 2776 PetscBLASInt B_N = mss; 2777 PetscReal zero = 0.0; 2778 PetscReal eps = 0.0; /* dlamch? */ 2779 2780 B_lwork = -1; 2781 S = NULL; 2782 St = NULL; 2783 eigs = NULL; 2784 eigv = NULL; 2785 B_iwork = NULL; 2786 B_ifail = NULL; 2787 #if defined(PETSC_USE_COMPLEX) 2788 rwork = NULL; 2789 #endif 2790 thresh = 1.0; 2791 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2792 #if defined(PETSC_USE_COMPLEX) 2793 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2794 #else 2795 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2796 #endif 2797 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2798 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2799 } else { 2800 /* TODO */ 2801 } 2802 } else { 2803 lwork = 0; 2804 } 2805 2806 nv = 0; 2807 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2808 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2809 } 2810 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2811 if (allocated_S_St) { 2812 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2813 } 2814 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2815 #if defined(PETSC_USE_COMPLEX) 2816 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2817 #endif 2818 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2819 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2820 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2821 nv+cum,&pcbddc->adaptive_constraints_idxs, 2822 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2823 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2824 2825 maxneigs = 0; 2826 cum = cumarray = 0; 2827 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2828 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2829 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2830 const PetscInt *idxs; 2831 2832 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2833 for (cum=0;cum<nv;cum++) { 2834 pcbddc->adaptive_constraints_n[cum] = 1; 2835 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2836 pcbddc->adaptive_constraints_data[cum] = 1.0; 2837 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2838 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2839 } 2840 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2841 } 2842 2843 if (mss) { /* multilevel */ 2844 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2845 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2846 } 2847 2848 thresh = pcbddc->adaptive_threshold; 2849 for (i=0;i<sub_schurs->n_subs;i++) { 2850 const PetscInt *idxs; 2851 PetscReal upper,lower; 2852 PetscInt j,subset_size,eigs_start = 0; 2853 PetscBLASInt B_N; 2854 PetscBool same_data = PETSC_FALSE; 2855 2856 if (pcbddc->use_deluxe_scaling) { 2857 upper = PETSC_MAX_REAL; 2858 lower = thresh; 2859 } else { 2860 upper = 1./thresh; 2861 lower = 0.; 2862 } 2863 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2864 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2865 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2866 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2867 if (sub_schurs->is_hermitian) { 2868 PetscInt j,k; 2869 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2870 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2871 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2872 } 2873 for (j=0;j<subset_size;j++) { 2874 for (k=j;k<subset_size;k++) { 2875 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2876 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2877 } 2878 } 2879 } else { 2880 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2881 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2882 } 2883 } else { 2884 S = Sarray + cumarray; 2885 St = Starray + cumarray; 2886 } 2887 /* see if we can save some work */ 2888 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2889 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2890 } 2891 2892 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2893 B_neigs = 0; 2894 } else { 2895 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2896 PetscBLASInt B_itype = 1; 2897 PetscBLASInt B_IL, B_IU; 2898 PetscReal eps = -1.0; /* dlamch? */ 2899 PetscInt nmin_s; 2900 PetscBool compute_range = PETSC_FALSE; 2901 2902 if (pcbddc->dbg_flag) { 2903 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2904 } 2905 2906 compute_range = PETSC_FALSE; 2907 if (thresh > 1.+PETSC_SMALL && !same_data) { 2908 compute_range = PETSC_TRUE; 2909 } 2910 2911 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2912 if (compute_range) { 2913 2914 /* ask for eigenvalues larger than thresh */ 2915 #if defined(PETSC_USE_COMPLEX) 2916 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2917 #else 2918 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2919 #endif 2920 } else if (!same_data) { 2921 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2922 B_IL = 1; 2923 #if defined(PETSC_USE_COMPLEX) 2924 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2925 #else 2926 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2927 #endif 2928 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2929 PetscInt k; 2930 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2931 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2932 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2933 nmin = nmax; 2934 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2935 for (k=0;k<nmax;k++) { 2936 eigs[k] = 1./PETSC_SMALL; 2937 eigv[k*(subset_size+1)] = 1.0; 2938 } 2939 } 2940 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2941 if (B_ierr) { 2942 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2943 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2944 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2945 } 2946 2947 if (B_neigs > nmax) { 2948 if (pcbddc->dbg_flag) { 2949 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2950 } 2951 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2952 B_neigs = nmax; 2953 } 2954 2955 nmin_s = PetscMin(nmin,B_N); 2956 if (B_neigs < nmin_s) { 2957 PetscBLASInt B_neigs2; 2958 2959 if (pcbddc->use_deluxe_scaling) { 2960 B_IL = B_N - nmin_s + 1; 2961 B_IU = B_N - B_neigs; 2962 } else { 2963 B_IL = B_neigs + 1; 2964 B_IU = nmin_s; 2965 } 2966 if (pcbddc->dbg_flag) { 2967 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2968 } 2969 if (sub_schurs->is_hermitian) { 2970 PetscInt j,k; 2971 for (j=0;j<subset_size;j++) { 2972 for (k=j;k<subset_size;k++) { 2973 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2974 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2975 } 2976 } 2977 } else { 2978 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2979 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2980 } 2981 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2982 #if defined(PETSC_USE_COMPLEX) 2983 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2984 #else 2985 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2986 #endif 2987 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2988 B_neigs += B_neigs2; 2989 } 2990 if (B_ierr) { 2991 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2992 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2993 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2994 } 2995 if (pcbddc->dbg_flag) { 2996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2997 for (j=0;j<B_neigs;j++) { 2998 if (eigs[j] == 0.0) { 2999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3000 } else { 3001 if (pcbddc->use_deluxe_scaling) { 3002 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3003 } else { 3004 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3005 } 3006 } 3007 } 3008 } 3009 } else { 3010 /* TODO */ 3011 } 3012 } 3013 /* change the basis back to the original one */ 3014 if (sub_schurs->change) { 3015 Mat change,phi,phit; 3016 3017 if (pcbddc->dbg_flag > 1) { 3018 PetscInt ii; 3019 for (ii=0;ii<B_neigs;ii++) { 3020 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3021 for (j=0;j<B_N;j++) { 3022 #if defined(PETSC_USE_COMPLEX) 3023 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3024 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3025 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3026 #else 3027 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3028 #endif 3029 } 3030 } 3031 } 3032 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3033 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3034 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3035 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3036 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3037 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3038 } 3039 maxneigs = PetscMax(B_neigs,maxneigs); 3040 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3041 if (B_neigs) { 3042 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3043 3044 if (pcbddc->dbg_flag > 1) { 3045 PetscInt ii; 3046 for (ii=0;ii<B_neigs;ii++) { 3047 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3048 for (j=0;j<B_N;j++) { 3049 #if defined(PETSC_USE_COMPLEX) 3050 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3051 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3052 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3053 #else 3054 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3055 #endif 3056 } 3057 } 3058 } 3059 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3060 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3061 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3062 cum++; 3063 } 3064 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3065 /* shift for next computation */ 3066 cumarray += subset_size*subset_size; 3067 } 3068 if (pcbddc->dbg_flag) { 3069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3070 } 3071 3072 if (mss) { 3073 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3074 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3075 /* destroy matrices (junk) */ 3076 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3077 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3078 } 3079 if (allocated_S_St) { 3080 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3081 } 3082 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3083 #if defined(PETSC_USE_COMPLEX) 3084 ierr = PetscFree(rwork);CHKERRQ(ierr); 3085 #endif 3086 if (pcbddc->dbg_flag) { 3087 PetscInt maxneigs_r; 3088 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3090 } 3091 PetscFunctionReturn(0); 3092 } 3093 3094 #undef __FUNCT__ 3095 #define __FUNCT__ "PCBDDCSetUpSolvers" 3096 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3097 { 3098 PetscScalar *coarse_submat_vals; 3099 PetscErrorCode ierr; 3100 3101 PetscFunctionBegin; 3102 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3103 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3104 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3105 3106 /* Setup local neumann solver ksp_R */ 3107 /* PCBDDCSetUpLocalScatters should be called first! */ 3108 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3109 3110 /* 3111 Setup local correction and local part of coarse basis. 3112 Gives back the dense local part of the coarse matrix in column major ordering 3113 */ 3114 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3115 3116 /* Compute total number of coarse nodes and setup coarse solver */ 3117 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3118 3119 /* free */ 3120 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3121 PetscFunctionReturn(0); 3122 } 3123 3124 #undef __FUNCT__ 3125 #define __FUNCT__ "PCBDDCResetCustomization" 3126 PetscErrorCode PCBDDCResetCustomization(PC pc) 3127 { 3128 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3129 PetscErrorCode ierr; 3130 3131 PetscFunctionBegin; 3132 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3133 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3134 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3135 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3136 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3137 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3138 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3139 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3140 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3141 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3142 PetscFunctionReturn(0); 3143 } 3144 3145 #undef __FUNCT__ 3146 #define __FUNCT__ "PCBDDCResetTopography" 3147 PetscErrorCode PCBDDCResetTopography(PC pc) 3148 { 3149 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3150 PetscInt i; 3151 PetscErrorCode ierr; 3152 3153 PetscFunctionBegin; 3154 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3155 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3156 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3157 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3158 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3159 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3160 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3161 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3162 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3163 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3164 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3165 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3166 for (i=0;i<pcbddc->n_local_subs;i++) { 3167 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3168 } 3169 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3170 if (pcbddc->sub_schurs) { 3171 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 3172 } 3173 pcbddc->graphanalyzed = PETSC_FALSE; 3174 pcbddc->recompute_topography = PETSC_TRUE; 3175 PetscFunctionReturn(0); 3176 } 3177 3178 #undef __FUNCT__ 3179 #define __FUNCT__ "PCBDDCResetSolvers" 3180 PetscErrorCode PCBDDCResetSolvers(PC pc) 3181 { 3182 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3183 PetscErrorCode ierr; 3184 3185 PetscFunctionBegin; 3186 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3187 if (pcbddc->coarse_phi_B) { 3188 PetscScalar *array; 3189 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3190 ierr = PetscFree(array);CHKERRQ(ierr); 3191 } 3192 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3193 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3194 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3195 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3196 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3197 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3198 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3199 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3200 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3201 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3202 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3203 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3204 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3205 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3206 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 3207 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 3208 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3209 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3210 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3211 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3212 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3213 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3214 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3215 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3216 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3217 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3218 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3219 if (pcbddc->benign_zerodiag_subs) { 3220 PetscInt i; 3221 for (i=0;i<pcbddc->benign_n;i++) { 3222 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3223 } 3224 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3225 } 3226 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3227 PetscFunctionReturn(0); 3228 } 3229 3230 #undef __FUNCT__ 3231 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 3232 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3233 { 3234 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3235 PC_IS *pcis = (PC_IS*)pc->data; 3236 VecType impVecType; 3237 PetscInt n_constraints,n_R,old_size; 3238 PetscErrorCode ierr; 3239 3240 PetscFunctionBegin; 3241 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3242 n_R = pcis->n - pcbddc->n_vertices; 3243 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3244 /* local work vectors (try to avoid unneeded work)*/ 3245 /* R nodes */ 3246 old_size = -1; 3247 if (pcbddc->vec1_R) { 3248 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3249 } 3250 if (n_R != old_size) { 3251 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3252 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3253 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3254 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3255 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3256 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3257 } 3258 /* local primal dofs */ 3259 old_size = -1; 3260 if (pcbddc->vec1_P) { 3261 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3262 } 3263 if (pcbddc->local_primal_size != old_size) { 3264 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3265 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3266 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3267 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3268 } 3269 /* local explicit constraints */ 3270 old_size = -1; 3271 if (pcbddc->vec1_C) { 3272 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3273 } 3274 if (n_constraints && n_constraints != old_size) { 3275 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3276 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3277 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3278 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3279 } 3280 PetscFunctionReturn(0); 3281 } 3282 3283 #undef __FUNCT__ 3284 #define __FUNCT__ "PCBDDCSetUpCorrection" 3285 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3286 { 3287 PetscErrorCode ierr; 3288 /* pointers to pcis and pcbddc */ 3289 PC_IS* pcis = (PC_IS*)pc->data; 3290 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3291 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3292 /* submatrices of local problem */ 3293 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3294 /* submatrices of local coarse problem */ 3295 Mat S_VV,S_CV,S_VC,S_CC; 3296 /* working matrices */ 3297 Mat C_CR; 3298 /* additional working stuff */ 3299 PC pc_R; 3300 Mat F; 3301 Vec dummy_vec; 3302 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3303 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3304 PetscScalar *work; 3305 PetscInt *idx_V_B; 3306 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3307 PetscInt i,n_R,n_D,n_B; 3308 3309 /* some shortcuts to scalars */ 3310 PetscScalar one=1.0,m_one=-1.0; 3311 3312 PetscFunctionBegin; 3313 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3314 3315 /* Set Non-overlapping dimensions */ 3316 n_vertices = pcbddc->n_vertices; 3317 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3318 n_B = pcis->n_B; 3319 n_D = pcis->n - n_B; 3320 n_R = pcis->n - n_vertices; 3321 3322 /* vertices in boundary numbering */ 3323 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3324 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3325 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3326 3327 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3328 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3329 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3330 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3331 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3332 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3333 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3334 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3335 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3336 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3337 3338 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3339 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3340 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3341 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3342 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3343 lda_rhs = n_R; 3344 need_benign_correction = PETSC_FALSE; 3345 if (isLU || isILU || isCHOL) { 3346 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3347 } else if (sub_schurs && sub_schurs->reuse_solver) { 3348 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3349 MatFactorType type; 3350 3351 F = reuse_solver->F; 3352 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3353 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3354 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3355 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3356 } else { 3357 F = NULL; 3358 } 3359 3360 /* allocate workspace */ 3361 n = 0; 3362 if (n_constraints) { 3363 n += lda_rhs*n_constraints; 3364 } 3365 if (n_vertices) { 3366 n = PetscMax(2*lda_rhs*n_vertices,n); 3367 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3368 } 3369 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3370 3371 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3372 dummy_vec = NULL; 3373 if (need_benign_correction && lda_rhs != n_R && F) { 3374 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3375 } 3376 3377 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3378 if (n_constraints) { 3379 Mat M1,M2,M3,C_B; 3380 IS is_aux; 3381 PetscScalar *array,*array2; 3382 3383 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3384 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3385 3386 /* Extract constraints on R nodes: C_{CR} */ 3387 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3388 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3389 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3390 3391 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3392 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3393 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3394 for (i=0;i<n_constraints;i++) { 3395 const PetscScalar *row_cmat_values; 3396 const PetscInt *row_cmat_indices; 3397 PetscInt size_of_constraint,j; 3398 3399 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3400 for (j=0;j<size_of_constraint;j++) { 3401 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3402 } 3403 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3404 } 3405 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3406 if (F) { 3407 Mat B; 3408 3409 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3410 if (need_benign_correction) { 3411 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3412 3413 /* rhs is already zero on interior dofs, no need to change the rhs */ 3414 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3415 } 3416 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3417 if (need_benign_correction) { 3418 PetscScalar *marr; 3419 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3420 3421 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3422 if (lda_rhs != n_R) { 3423 for (i=0;i<n_constraints;i++) { 3424 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3425 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3426 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3427 } 3428 } else { 3429 for (i=0;i<n_constraints;i++) { 3430 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3431 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3432 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3433 } 3434 } 3435 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3436 } 3437 ierr = MatDestroy(&B);CHKERRQ(ierr); 3438 } else { 3439 PetscScalar *marr; 3440 3441 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3442 for (i=0;i<n_constraints;i++) { 3443 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3444 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3445 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3446 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3447 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3448 } 3449 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3450 } 3451 if (!pcbddc->switch_static) { 3452 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3453 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3454 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3455 for (i=0;i<n_constraints;i++) { 3456 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3457 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3458 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3459 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3460 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3461 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3462 } 3463 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3464 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3465 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3466 } else { 3467 if (lda_rhs != n_R) { 3468 IS dummy; 3469 3470 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3471 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3472 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3473 } else { 3474 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3475 pcbddc->local_auxmat2 = local_auxmat2_R; 3476 } 3477 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3478 } 3479 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3480 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3481 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3482 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3483 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3484 if (isCHOL) { 3485 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3486 } else { 3487 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3488 } 3489 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3490 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3491 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3492 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3493 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3494 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3495 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3496 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3497 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3498 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3499 } 3500 3501 /* Get submatrices from subdomain matrix */ 3502 if (n_vertices) { 3503 IS is_aux; 3504 3505 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3506 IS tis; 3507 3508 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3509 ierr = ISSort(tis);CHKERRQ(ierr); 3510 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3511 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3512 } else { 3513 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3514 } 3515 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3516 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3517 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3518 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3519 } 3520 3521 /* Matrix of coarse basis functions (local) */ 3522 if (pcbddc->coarse_phi_B) { 3523 PetscInt on_B,on_primal,on_D=n_D; 3524 if (pcbddc->coarse_phi_D) { 3525 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3526 } 3527 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3528 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3529 PetscScalar *marray; 3530 3531 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3532 ierr = PetscFree(marray);CHKERRQ(ierr); 3533 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3534 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3535 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3536 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3537 } 3538 } 3539 3540 if (!pcbddc->coarse_phi_B) { 3541 PetscScalar *marray; 3542 3543 n = n_B*pcbddc->local_primal_size; 3544 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3545 n += n_D*pcbddc->local_primal_size; 3546 } 3547 if (!pcbddc->symmetric_primal) { 3548 n *= 2; 3549 } 3550 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3551 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3552 n = n_B*pcbddc->local_primal_size; 3553 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3554 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3555 n += n_D*pcbddc->local_primal_size; 3556 } 3557 if (!pcbddc->symmetric_primal) { 3558 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3559 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3560 n = n_B*pcbddc->local_primal_size; 3561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3562 } 3563 } else { 3564 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3565 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3566 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3567 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3568 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3569 } 3570 } 3571 } 3572 3573 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3574 p0_lidx_I = NULL; 3575 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3576 const PetscInt *idxs; 3577 3578 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3579 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3580 for (i=0;i<pcbddc->benign_n;i++) { 3581 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3582 } 3583 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3584 } 3585 3586 /* vertices */ 3587 if (n_vertices) { 3588 3589 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3590 3591 if (n_R) { 3592 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3593 PetscBLASInt B_N,B_one = 1; 3594 PetscScalar *x,*y; 3595 PetscBool isseqaij; 3596 3597 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3598 if (need_benign_correction) { 3599 ISLocalToGlobalMapping RtoN; 3600 IS is_p0; 3601 PetscInt *idxs_p0,n; 3602 3603 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3604 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3605 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3606 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3607 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3608 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3609 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3610 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3611 } 3612 3613 if (lda_rhs == n_R) { 3614 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3615 } else { 3616 PetscScalar *av,*array; 3617 const PetscInt *xadj,*adjncy; 3618 PetscInt n; 3619 PetscBool flg_row; 3620 3621 array = work+lda_rhs*n_vertices; 3622 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3623 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3624 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3625 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3626 for (i=0;i<n;i++) { 3627 PetscInt j; 3628 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3629 } 3630 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3631 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3632 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3633 } 3634 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3635 if (need_benign_correction) { 3636 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3637 PetscScalar *marr; 3638 3639 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3640 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3641 3642 | 0 0 0 | (V) 3643 L = | 0 0 -1 | (P-p0) 3644 | 0 0 -1 | (p0) 3645 3646 */ 3647 for (i=0;i<reuse_solver->benign_n;i++) { 3648 const PetscScalar *vals; 3649 const PetscInt *idxs,*idxs_zero; 3650 PetscInt n,j,nz; 3651 3652 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3653 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3654 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3655 for (j=0;j<n;j++) { 3656 PetscScalar val = vals[j]; 3657 PetscInt k,col = idxs[j]; 3658 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3659 } 3660 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3661 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3662 } 3663 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3664 } 3665 if (F) { 3666 /* need to correct the rhs */ 3667 if (need_benign_correction) { 3668 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3669 PetscScalar *marr; 3670 3671 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3672 if (lda_rhs != n_R) { 3673 for (i=0;i<n_vertices;i++) { 3674 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3675 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3676 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3677 } 3678 } else { 3679 for (i=0;i<n_vertices;i++) { 3680 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3681 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3682 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3683 } 3684 } 3685 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3686 } 3687 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3688 /* need to correct the solution */ 3689 if (need_benign_correction) { 3690 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3691 PetscScalar *marr; 3692 3693 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3694 if (lda_rhs != n_R) { 3695 for (i=0;i<n_vertices;i++) { 3696 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3697 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3698 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3699 } 3700 } else { 3701 for (i=0;i<n_vertices;i++) { 3702 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3703 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3704 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3705 } 3706 } 3707 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3708 } 3709 } else { 3710 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3711 for (i=0;i<n_vertices;i++) { 3712 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3713 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3714 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3715 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3716 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3717 } 3718 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3719 } 3720 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3721 /* S_VV and S_CV */ 3722 if (n_constraints) { 3723 Mat B; 3724 3725 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3726 for (i=0;i<n_vertices;i++) { 3727 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3728 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3729 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3730 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3731 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3732 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3733 } 3734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3735 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3736 ierr = MatDestroy(&B);CHKERRQ(ierr); 3737 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3738 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3739 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3740 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3741 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3742 ierr = MatDestroy(&B);CHKERRQ(ierr); 3743 } 3744 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3745 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3746 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3747 } 3748 if (lda_rhs != n_R) { 3749 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3750 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3751 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3752 } 3753 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3754 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3755 if (need_benign_correction) { 3756 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3757 PetscScalar *marr,*sums; 3758 3759 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3760 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3761 for (i=0;i<reuse_solver->benign_n;i++) { 3762 const PetscScalar *vals; 3763 const PetscInt *idxs,*idxs_zero; 3764 PetscInt n,j,nz; 3765 3766 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3767 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3768 for (j=0;j<n_vertices;j++) { 3769 PetscInt k; 3770 sums[j] = 0.; 3771 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3772 } 3773 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3774 for (j=0;j<n;j++) { 3775 PetscScalar val = vals[j]; 3776 PetscInt k; 3777 for (k=0;k<n_vertices;k++) { 3778 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3779 } 3780 } 3781 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3782 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3783 } 3784 ierr = PetscFree(sums);CHKERRQ(ierr); 3785 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3786 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3787 } 3788 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3789 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3790 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3791 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3792 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3793 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3794 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3795 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3796 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3797 } else { 3798 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3799 } 3800 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3801 3802 /* coarse basis functions */ 3803 for (i=0;i<n_vertices;i++) { 3804 PetscScalar *y; 3805 3806 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3807 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3808 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3809 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3810 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3811 y[n_B*i+idx_V_B[i]] = 1.0; 3812 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3813 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3814 3815 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3816 PetscInt j; 3817 3818 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3819 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3820 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3821 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3822 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3823 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3824 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3825 } 3826 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3827 } 3828 /* if n_R == 0 the object is not destroyed */ 3829 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3830 } 3831 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3832 3833 if (n_constraints) { 3834 Mat B; 3835 3836 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3837 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3838 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3839 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3840 if (n_vertices) { 3841 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3842 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3843 } else { 3844 Mat S_VCt; 3845 3846 if (lda_rhs != n_R) { 3847 ierr = MatDestroy(&B);CHKERRQ(ierr); 3848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3849 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3850 } 3851 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3852 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3853 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3854 } 3855 } 3856 ierr = MatDestroy(&B);CHKERRQ(ierr); 3857 /* coarse basis functions */ 3858 for (i=0;i<n_constraints;i++) { 3859 PetscScalar *y; 3860 3861 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3862 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3863 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3864 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3865 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3866 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3867 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3868 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3869 PetscInt j; 3870 3871 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3872 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3873 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3874 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3875 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3876 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3877 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3878 } 3879 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3880 } 3881 } 3882 if (n_constraints) { 3883 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3884 } 3885 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3886 3887 /* coarse matrix entries relative to B_0 */ 3888 if (pcbddc->benign_n) { 3889 Mat B0_B,B0_BPHI; 3890 IS is_dummy; 3891 PetscScalar *data; 3892 PetscInt j; 3893 3894 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3895 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3896 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3897 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3898 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3899 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3900 for (j=0;j<pcbddc->benign_n;j++) { 3901 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3902 for (i=0;i<pcbddc->local_primal_size;i++) { 3903 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3904 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3905 } 3906 } 3907 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3908 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3909 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3910 } 3911 3912 /* compute other basis functions for non-symmetric problems */ 3913 if (!pcbddc->symmetric_primal) { 3914 Mat B_V=NULL,B_C=NULL; 3915 PetscScalar *marray; 3916 3917 if (n_constraints) { 3918 Mat S_CCT,C_CRT; 3919 3920 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3921 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3922 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3923 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3924 if (n_vertices) { 3925 Mat S_VCT; 3926 3927 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3928 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3929 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3930 } 3931 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3932 } else { 3933 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3934 } 3935 if (n_vertices && n_R) { 3936 PetscScalar *av,*marray; 3937 const PetscInt *xadj,*adjncy; 3938 PetscInt n; 3939 PetscBool flg_row; 3940 3941 /* B_V = B_V - A_VR^T */ 3942 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3943 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3944 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3945 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3946 for (i=0;i<n;i++) { 3947 PetscInt j; 3948 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3949 } 3950 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3951 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3952 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3953 } 3954 3955 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3956 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3957 for (i=0;i<n_vertices;i++) { 3958 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3959 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3960 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3961 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3962 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3963 } 3964 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3965 if (B_C) { 3966 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3967 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3968 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3969 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3970 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3971 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3972 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3973 } 3974 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3975 } 3976 /* coarse basis functions */ 3977 for (i=0;i<pcbddc->local_primal_size;i++) { 3978 PetscScalar *y; 3979 3980 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3981 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3982 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3983 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3984 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3985 if (i<n_vertices) { 3986 y[n_B*i+idx_V_B[i]] = 1.0; 3987 } 3988 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3989 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3990 3991 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3992 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3993 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3994 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3995 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3996 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3997 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3998 } 3999 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4000 } 4001 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4002 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4003 } 4004 /* free memory */ 4005 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4006 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4007 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4008 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4009 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4010 ierr = PetscFree(work);CHKERRQ(ierr); 4011 if (n_vertices) { 4012 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4013 } 4014 if (n_constraints) { 4015 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4016 } 4017 /* Checking coarse_sub_mat and coarse basis functios */ 4018 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4019 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4020 if (pcbddc->dbg_flag) { 4021 Mat coarse_sub_mat; 4022 Mat AUXMAT,TM1,TM2,TM3,TM4; 4023 Mat coarse_phi_D,coarse_phi_B; 4024 Mat coarse_psi_D,coarse_psi_B; 4025 Mat A_II,A_BB,A_IB,A_BI; 4026 Mat C_B,CPHI; 4027 IS is_dummy; 4028 Vec mones; 4029 MatType checkmattype=MATSEQAIJ; 4030 PetscReal real_value; 4031 4032 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4033 Mat A; 4034 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4035 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4036 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4037 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4038 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4039 ierr = MatDestroy(&A);CHKERRQ(ierr); 4040 } else { 4041 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4042 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4043 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4044 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4045 } 4046 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4047 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4048 if (!pcbddc->symmetric_primal) { 4049 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4050 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4051 } 4052 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4053 4054 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4055 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4056 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4057 if (!pcbddc->symmetric_primal) { 4058 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4059 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4061 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4062 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4063 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4064 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4065 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4067 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4068 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4070 } else { 4071 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4072 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4073 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4074 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4075 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4076 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4077 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4078 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4079 } 4080 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4081 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4082 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4083 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4084 if (pcbddc->benign_n) { 4085 Mat B0_B,B0_BPHI; 4086 PetscScalar *data,*data2; 4087 PetscInt j; 4088 4089 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4090 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4091 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4092 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4093 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4094 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4095 for (j=0;j<pcbddc->benign_n;j++) { 4096 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4097 for (i=0;i<pcbddc->local_primal_size;i++) { 4098 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4099 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4100 } 4101 } 4102 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4103 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4104 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4105 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4106 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4107 } 4108 #if 0 4109 { 4110 PetscViewer viewer; 4111 char filename[256]; 4112 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4113 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4114 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4115 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4116 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4117 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4118 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4119 if (save_change) { 4120 Mat phi_B; 4121 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4122 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4123 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4124 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4125 } else { 4126 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4127 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4128 } 4129 if (pcbddc->coarse_phi_D) { 4130 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4131 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4132 } 4133 if (pcbddc->coarse_psi_B) { 4134 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4135 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4136 } 4137 if (pcbddc->coarse_psi_D) { 4138 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4139 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4140 } 4141 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4142 } 4143 #endif 4144 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4145 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4146 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4147 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4148 4149 /* check constraints */ 4150 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4151 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4152 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4153 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4154 } else { 4155 PetscScalar *data; 4156 Mat tmat; 4157 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4158 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4159 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4160 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4161 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4162 } 4163 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4164 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4165 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4166 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4167 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4168 if (!pcbddc->symmetric_primal) { 4169 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4170 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4171 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4172 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4173 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4174 } 4175 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4176 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4177 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4178 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4179 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4180 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4181 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4182 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4183 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4184 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4185 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4186 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4187 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4188 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4189 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4190 if (!pcbddc->symmetric_primal) { 4191 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4192 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4193 } 4194 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4195 } 4196 /* get back data */ 4197 *coarse_submat_vals_n = coarse_submat_vals; 4198 PetscFunctionReturn(0); 4199 } 4200 4201 #undef __FUNCT__ 4202 #define __FUNCT__ "MatGetSubMatrixUnsorted" 4203 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4204 { 4205 Mat *work_mat; 4206 IS isrow_s,iscol_s; 4207 PetscBool rsorted,csorted; 4208 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4209 PetscErrorCode ierr; 4210 4211 PetscFunctionBegin; 4212 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4213 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4214 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4215 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4216 4217 if (!rsorted) { 4218 const PetscInt *idxs; 4219 PetscInt *idxs_sorted,i; 4220 4221 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4222 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4223 for (i=0;i<rsize;i++) { 4224 idxs_perm_r[i] = i; 4225 } 4226 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4227 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4228 for (i=0;i<rsize;i++) { 4229 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4230 } 4231 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4232 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4233 } else { 4234 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4235 isrow_s = isrow; 4236 } 4237 4238 if (!csorted) { 4239 if (isrow == iscol) { 4240 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4241 iscol_s = isrow_s; 4242 } else { 4243 const PetscInt *idxs; 4244 PetscInt *idxs_sorted,i; 4245 4246 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4247 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4248 for (i=0;i<csize;i++) { 4249 idxs_perm_c[i] = i; 4250 } 4251 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4252 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4253 for (i=0;i<csize;i++) { 4254 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4255 } 4256 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4257 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4258 } 4259 } else { 4260 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4261 iscol_s = iscol; 4262 } 4263 4264 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4265 4266 if (!rsorted || !csorted) { 4267 Mat new_mat; 4268 IS is_perm_r,is_perm_c; 4269 4270 if (!rsorted) { 4271 PetscInt *idxs_r,i; 4272 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4273 for (i=0;i<rsize;i++) { 4274 idxs_r[idxs_perm_r[i]] = i; 4275 } 4276 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4277 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4278 } else { 4279 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4280 } 4281 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4282 4283 if (!csorted) { 4284 if (isrow_s == iscol_s) { 4285 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4286 is_perm_c = is_perm_r; 4287 } else { 4288 PetscInt *idxs_c,i; 4289 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4290 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4291 for (i=0;i<csize;i++) { 4292 idxs_c[idxs_perm_c[i]] = i; 4293 } 4294 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4295 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4296 } 4297 } else { 4298 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4299 } 4300 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4301 4302 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4303 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4304 work_mat[0] = new_mat; 4305 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4306 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4307 } 4308 4309 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4310 *B = work_mat[0]; 4311 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4312 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4313 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4314 PetscFunctionReturn(0); 4315 } 4316 4317 #undef __FUNCT__ 4318 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4319 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4320 { 4321 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4322 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4323 Mat new_mat; 4324 IS is_local,is_global; 4325 PetscInt local_size; 4326 PetscBool isseqaij; 4327 PetscErrorCode ierr; 4328 4329 PetscFunctionBegin; 4330 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4331 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4332 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4333 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4334 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4335 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4336 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4337 4338 /* check */ 4339 if (pcbddc->dbg_flag) { 4340 Vec x,x_change; 4341 PetscReal error; 4342 4343 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4344 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4345 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4346 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4347 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4348 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4349 if (!pcbddc->change_interior) { 4350 const PetscScalar *x,*y,*v; 4351 PetscReal lerror = 0.; 4352 PetscInt i; 4353 4354 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4355 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4356 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4357 for (i=0;i<local_size;i++) 4358 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4359 lerror = PetscAbsScalar(x[i]-y[i]); 4360 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4361 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4362 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4363 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4364 if (error > PETSC_SMALL) { 4365 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4366 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4367 } else { 4368 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4369 } 4370 } 4371 } 4372 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4373 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4374 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4375 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4376 if (error > PETSC_SMALL) { 4377 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4378 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4379 } else { 4380 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4381 } 4382 } 4383 ierr = VecDestroy(&x);CHKERRQ(ierr); 4384 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4385 } 4386 4387 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4388 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4389 if (isseqaij) { 4390 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4391 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4392 } else { 4393 Mat work_mat; 4394 4395 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4396 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4397 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4398 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4399 } 4400 if (matis->A->symmetric_set) { 4401 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4402 #if !defined(PETSC_USE_COMPLEX) 4403 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4404 #endif 4405 } 4406 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4407 PetscFunctionReturn(0); 4408 } 4409 4410 #undef __FUNCT__ 4411 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4412 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4413 { 4414 PC_IS* pcis = (PC_IS*)(pc->data); 4415 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4416 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4417 PetscInt *idx_R_local=NULL; 4418 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4419 PetscInt vbs,bs; 4420 PetscBT bitmask=NULL; 4421 PetscErrorCode ierr; 4422 4423 PetscFunctionBegin; 4424 /* 4425 No need to setup local scatters if 4426 - primal space is unchanged 4427 AND 4428 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4429 AND 4430 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4431 */ 4432 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4433 PetscFunctionReturn(0); 4434 } 4435 /* destroy old objects */ 4436 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4437 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4438 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4439 /* Set Non-overlapping dimensions */ 4440 n_B = pcis->n_B; 4441 n_D = pcis->n - n_B; 4442 n_vertices = pcbddc->n_vertices; 4443 4444 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4445 4446 /* create auxiliary bitmask and allocate workspace */ 4447 if (!sub_schurs || !sub_schurs->reuse_solver) { 4448 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4449 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4450 for (i=0;i<n_vertices;i++) { 4451 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4452 } 4453 4454 for (i=0, n_R=0; i<pcis->n; i++) { 4455 if (!PetscBTLookup(bitmask,i)) { 4456 idx_R_local[n_R++] = i; 4457 } 4458 } 4459 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4460 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4461 4462 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4463 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4464 } 4465 4466 /* Block code */ 4467 vbs = 1; 4468 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4469 if (bs>1 && !(n_vertices%bs)) { 4470 PetscBool is_blocked = PETSC_TRUE; 4471 PetscInt *vary; 4472 if (!sub_schurs || !sub_schurs->reuse_solver) { 4473 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4474 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4475 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4476 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4477 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4478 for (i=0; i<pcis->n/bs; i++) { 4479 if (vary[i]!=0 && vary[i]!=bs) { 4480 is_blocked = PETSC_FALSE; 4481 break; 4482 } 4483 } 4484 ierr = PetscFree(vary);CHKERRQ(ierr); 4485 } else { 4486 /* Verify directly the R set */ 4487 for (i=0; i<n_R/bs; i++) { 4488 PetscInt j,node=idx_R_local[bs*i]; 4489 for (j=1; j<bs; j++) { 4490 if (node != idx_R_local[bs*i+j]-j) { 4491 is_blocked = PETSC_FALSE; 4492 break; 4493 } 4494 } 4495 } 4496 } 4497 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4498 vbs = bs; 4499 for (i=0;i<n_R/vbs;i++) { 4500 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4501 } 4502 } 4503 } 4504 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4505 if (sub_schurs && sub_schurs->reuse_solver) { 4506 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4507 4508 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4509 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4510 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4511 reuse_solver->is_R = pcbddc->is_R_local; 4512 } else { 4513 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4514 } 4515 4516 /* print some info if requested */ 4517 if (pcbddc->dbg_flag) { 4518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4520 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4521 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4523 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4524 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4525 } 4526 4527 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4528 if (!sub_schurs || !sub_schurs->reuse_solver) { 4529 IS is_aux1,is_aux2; 4530 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4531 4532 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4533 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4534 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4535 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4536 for (i=0; i<n_D; i++) { 4537 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4538 } 4539 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4540 for (i=0, j=0; i<n_R; i++) { 4541 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4542 aux_array1[j++] = i; 4543 } 4544 } 4545 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4546 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4547 for (i=0, j=0; i<n_B; i++) { 4548 if (!PetscBTLookup(bitmask,is_indices[i])) { 4549 aux_array2[j++] = i; 4550 } 4551 } 4552 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4553 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4554 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4555 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4556 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4557 4558 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4559 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4560 for (i=0, j=0; i<n_R; i++) { 4561 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4562 aux_array1[j++] = i; 4563 } 4564 } 4565 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4566 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4567 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4568 } 4569 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4570 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4571 } else { 4572 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4573 IS tis; 4574 PetscInt schur_size; 4575 4576 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4577 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4578 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4579 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4580 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4581 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4582 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4583 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4584 } 4585 } 4586 PetscFunctionReturn(0); 4587 } 4588 4589 4590 #undef __FUNCT__ 4591 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4592 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4593 { 4594 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4595 PC_IS *pcis = (PC_IS*)pc->data; 4596 PC pc_temp; 4597 Mat A_RR; 4598 MatReuse reuse; 4599 PetscScalar m_one = -1.0; 4600 PetscReal value; 4601 PetscInt n_D,n_R; 4602 PetscBool check_corr[2],issbaij; 4603 PetscErrorCode ierr; 4604 /* prefixes stuff */ 4605 char dir_prefix[256],neu_prefix[256],str_level[16]; 4606 size_t len; 4607 4608 PetscFunctionBegin; 4609 4610 /* compute prefixes */ 4611 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4612 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4613 if (!pcbddc->current_level) { 4614 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4615 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4616 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4617 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4618 } else { 4619 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4620 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4621 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4622 len -= 15; /* remove "pc_bddc_coarse_" */ 4623 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4624 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4625 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4626 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4627 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4628 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4629 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4630 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4631 } 4632 4633 /* DIRICHLET PROBLEM */ 4634 if (dirichlet) { 4635 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4636 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4637 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4638 if (pcbddc->dbg_flag) { 4639 Mat A_IIn; 4640 4641 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4642 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4643 pcis->A_II = A_IIn; 4644 } 4645 } 4646 if (pcbddc->local_mat->symmetric_set) { 4647 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4648 } 4649 /* Matrix for Dirichlet problem is pcis->A_II */ 4650 n_D = pcis->n - pcis->n_B; 4651 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4652 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4653 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4654 /* default */ 4655 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4656 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4657 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4658 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4659 if (issbaij) { 4660 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4661 } else { 4662 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4663 } 4664 /* Allow user's customization */ 4665 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4666 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4667 } 4668 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4669 if (sub_schurs && sub_schurs->reuse_solver) { 4670 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4671 4672 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4673 } 4674 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4675 if (!n_D) { 4676 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4677 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4678 } 4679 /* Set Up KSP for Dirichlet problem of BDDC */ 4680 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4681 /* set ksp_D into pcis data */ 4682 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4683 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4684 pcis->ksp_D = pcbddc->ksp_D; 4685 } 4686 4687 /* NEUMANN PROBLEM */ 4688 A_RR = 0; 4689 if (neumann) { 4690 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4691 PetscInt ibs,mbs; 4692 PetscBool issbaij; 4693 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4694 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4695 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4696 if (pcbddc->ksp_R) { /* already created ksp */ 4697 PetscInt nn_R; 4698 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4699 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4700 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4701 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4702 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4703 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4704 reuse = MAT_INITIAL_MATRIX; 4705 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4706 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4707 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4708 reuse = MAT_INITIAL_MATRIX; 4709 } else { /* safe to reuse the matrix */ 4710 reuse = MAT_REUSE_MATRIX; 4711 } 4712 } 4713 /* last check */ 4714 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4715 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4716 reuse = MAT_INITIAL_MATRIX; 4717 } 4718 } else { /* first time, so we need to create the matrix */ 4719 reuse = MAT_INITIAL_MATRIX; 4720 } 4721 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4722 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4723 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4724 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4725 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4726 if (matis->A == pcbddc->local_mat) { 4727 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4728 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4729 } else { 4730 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4731 } 4732 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4733 if (matis->A == pcbddc->local_mat) { 4734 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4735 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4736 } else { 4737 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4738 } 4739 } 4740 /* extract A_RR */ 4741 if (sub_schurs && sub_schurs->reuse_solver) { 4742 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4743 4744 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4745 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4746 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4747 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4748 } else { 4749 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4750 } 4751 } else { 4752 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4753 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4754 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4755 } 4756 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4757 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4758 } 4759 if (pcbddc->local_mat->symmetric_set) { 4760 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4761 } 4762 if (!pcbddc->ksp_R) { /* create object if not present */ 4763 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4764 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4765 /* default */ 4766 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4767 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4768 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4769 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4770 if (issbaij) { 4771 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4772 } else { 4773 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4774 } 4775 /* Allow user's customization */ 4776 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4777 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4778 } 4779 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4780 if (!n_R) { 4781 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4782 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4783 } 4784 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4785 /* Reuse solver if it is present */ 4786 if (sub_schurs && sub_schurs->reuse_solver) { 4787 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4788 4789 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4790 } 4791 /* Set Up KSP for Neumann problem of BDDC */ 4792 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4793 } 4794 4795 if (pcbddc->dbg_flag) { 4796 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4797 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4798 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4799 } 4800 4801 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4802 check_corr[0] = check_corr[1] = PETSC_FALSE; 4803 if (pcbddc->NullSpace_corr[0]) { 4804 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4805 } 4806 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4807 check_corr[0] = PETSC_TRUE; 4808 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4809 } 4810 if (neumann && pcbddc->NullSpace_corr[2]) { 4811 check_corr[1] = PETSC_TRUE; 4812 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4813 } 4814 4815 /* check Dirichlet and Neumann solvers */ 4816 if (pcbddc->dbg_flag) { 4817 if (dirichlet) { /* Dirichlet */ 4818 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4819 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4820 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4821 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4822 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4823 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4824 if (check_corr[0]) { 4825 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4826 } 4827 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4828 } 4829 if (neumann) { /* Neumann */ 4830 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4831 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4832 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4833 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4834 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4835 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4836 if (check_corr[1]) { 4837 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4838 } 4839 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4840 } 4841 } 4842 /* free Neumann problem's matrix */ 4843 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4844 PetscFunctionReturn(0); 4845 } 4846 4847 #undef __FUNCT__ 4848 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4849 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4850 { 4851 PetscErrorCode ierr; 4852 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4853 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4854 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4855 4856 PetscFunctionBegin; 4857 if (!reuse_solver) { 4858 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4859 } 4860 if (!pcbddc->switch_static) { 4861 if (applytranspose && pcbddc->local_auxmat1) { 4862 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4863 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4864 } 4865 if (!reuse_solver) { 4866 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4867 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4868 } else { 4869 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4870 4871 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4872 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4873 } 4874 } else { 4875 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4876 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4877 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4878 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4879 if (applytranspose && pcbddc->local_auxmat1) { 4880 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4881 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4882 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4883 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4884 } 4885 } 4886 if (!reuse_solver || pcbddc->switch_static) { 4887 if (applytranspose) { 4888 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4889 } else { 4890 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4891 } 4892 } else { 4893 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4894 4895 if (applytranspose) { 4896 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4897 } else { 4898 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4899 } 4900 } 4901 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4902 if (!pcbddc->switch_static) { 4903 if (!reuse_solver) { 4904 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4905 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4906 } else { 4907 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4908 4909 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4910 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4911 } 4912 if (!applytranspose && pcbddc->local_auxmat1) { 4913 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4914 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4915 } 4916 } else { 4917 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4918 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4919 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4920 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4921 if (!applytranspose && pcbddc->local_auxmat1) { 4922 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4923 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4924 } 4925 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4926 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4927 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4928 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4929 } 4930 PetscFunctionReturn(0); 4931 } 4932 4933 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4934 #undef __FUNCT__ 4935 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4936 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4937 { 4938 PetscErrorCode ierr; 4939 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4940 PC_IS* pcis = (PC_IS*) (pc->data); 4941 const PetscScalar zero = 0.0; 4942 4943 PetscFunctionBegin; 4944 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4945 if (!pcbddc->benign_apply_coarse_only) { 4946 if (applytranspose) { 4947 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4948 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4949 } else { 4950 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4951 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4952 } 4953 } else { 4954 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4955 } 4956 4957 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4958 if (pcbddc->benign_n) { 4959 PetscScalar *array; 4960 PetscInt j; 4961 4962 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4963 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4964 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4965 } 4966 4967 /* start communications from local primal nodes to rhs of coarse solver */ 4968 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4969 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4970 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4971 4972 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4973 if (pcbddc->coarse_ksp) { 4974 Mat coarse_mat; 4975 Vec rhs,sol; 4976 MatNullSpace nullsp; 4977 PetscBool isbddc = PETSC_FALSE; 4978 4979 if (pcbddc->benign_have_null) { 4980 PC coarse_pc; 4981 4982 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4983 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4984 /* we need to propagate to coarser levels the need for a possible benign correction */ 4985 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4986 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4987 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4988 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4989 } 4990 } 4991 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4992 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4993 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4994 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4995 if (nullsp) { 4996 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4997 } 4998 if (applytranspose) { 4999 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5000 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5001 } else { 5002 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5003 PC coarse_pc; 5004 5005 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5006 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5007 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5008 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5009 } else { 5010 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5011 } 5012 } 5013 /* we don't need the benign correction at coarser levels anymore */ 5014 if (pcbddc->benign_have_null && isbddc) { 5015 PC coarse_pc; 5016 PC_BDDC* coarsepcbddc; 5017 5018 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5019 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5020 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5021 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5022 } 5023 if (nullsp) { 5024 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5025 } 5026 } 5027 5028 /* Local solution on R nodes */ 5029 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5030 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5031 } 5032 /* communications from coarse sol to local primal nodes */ 5033 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5034 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5035 5036 /* Sum contributions from the two levels */ 5037 if (!pcbddc->benign_apply_coarse_only) { 5038 if (applytranspose) { 5039 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5040 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5041 } else { 5042 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5043 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5044 } 5045 /* store p0 */ 5046 if (pcbddc->benign_n) { 5047 PetscScalar *array; 5048 PetscInt j; 5049 5050 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5051 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5052 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5053 } 5054 } else { /* expand the coarse solution */ 5055 if (applytranspose) { 5056 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5057 } else { 5058 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5059 } 5060 } 5061 PetscFunctionReturn(0); 5062 } 5063 5064 #undef __FUNCT__ 5065 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 5066 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5067 { 5068 PetscErrorCode ierr; 5069 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5070 PetscScalar *array; 5071 Vec from,to; 5072 5073 PetscFunctionBegin; 5074 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5075 from = pcbddc->coarse_vec; 5076 to = pcbddc->vec1_P; 5077 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5078 Vec tvec; 5079 5080 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5081 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5082 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5083 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5084 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5085 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5086 } 5087 } else { /* from local to global -> put data in coarse right hand side */ 5088 from = pcbddc->vec1_P; 5089 to = pcbddc->coarse_vec; 5090 } 5091 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5092 PetscFunctionReturn(0); 5093 } 5094 5095 #undef __FUNCT__ 5096 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 5097 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5098 { 5099 PetscErrorCode ierr; 5100 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5101 PetscScalar *array; 5102 Vec from,to; 5103 5104 PetscFunctionBegin; 5105 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5106 from = pcbddc->coarse_vec; 5107 to = pcbddc->vec1_P; 5108 } else { /* from local to global -> put data in coarse right hand side */ 5109 from = pcbddc->vec1_P; 5110 to = pcbddc->coarse_vec; 5111 } 5112 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5113 if (smode == SCATTER_FORWARD) { 5114 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5115 Vec tvec; 5116 5117 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5118 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5119 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5120 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5121 } 5122 } else { 5123 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5124 ierr = VecResetArray(from);CHKERRQ(ierr); 5125 } 5126 } 5127 PetscFunctionReturn(0); 5128 } 5129 5130 /* uncomment for testing purposes */ 5131 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5132 #undef __FUNCT__ 5133 #define __FUNCT__ "PCBDDCConstraintsSetUp" 5134 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5135 { 5136 PetscErrorCode ierr; 5137 PC_IS* pcis = (PC_IS*)(pc->data); 5138 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5139 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5140 /* one and zero */ 5141 PetscScalar one=1.0,zero=0.0; 5142 /* space to store constraints and their local indices */ 5143 PetscScalar *constraints_data; 5144 PetscInt *constraints_idxs,*constraints_idxs_B; 5145 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5146 PetscInt *constraints_n; 5147 /* iterators */ 5148 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5149 /* BLAS integers */ 5150 PetscBLASInt lwork,lierr; 5151 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5152 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5153 /* reuse */ 5154 PetscInt olocal_primal_size,olocal_primal_size_cc; 5155 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5156 /* change of basis */ 5157 PetscBool qr_needed; 5158 PetscBT change_basis,qr_needed_idx; 5159 /* auxiliary stuff */ 5160 PetscInt *nnz,*is_indices; 5161 PetscInt ncc; 5162 /* some quantities */ 5163 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5164 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5165 5166 PetscFunctionBegin; 5167 /* Destroy Mat objects computed previously */ 5168 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5169 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5170 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5171 /* save info on constraints from previous setup (if any) */ 5172 olocal_primal_size = pcbddc->local_primal_size; 5173 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5174 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5175 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5176 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5177 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5178 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5179 5180 if (!pcbddc->adaptive_selection) { 5181 IS ISForVertices,*ISForFaces,*ISForEdges; 5182 MatNullSpace nearnullsp; 5183 const Vec *nearnullvecs; 5184 Vec *localnearnullsp; 5185 PetscScalar *array; 5186 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5187 PetscBool nnsp_has_cnst; 5188 /* LAPACK working arrays for SVD or POD */ 5189 PetscBool skip_lapack,boolforchange; 5190 PetscScalar *work; 5191 PetscReal *singular_vals; 5192 #if defined(PETSC_USE_COMPLEX) 5193 PetscReal *rwork; 5194 #endif 5195 #if defined(PETSC_MISSING_LAPACK_GESVD) 5196 PetscScalar *temp_basis,*correlation_mat; 5197 #else 5198 PetscBLASInt dummy_int=1; 5199 PetscScalar dummy_scalar=1.; 5200 #endif 5201 5202 /* Get index sets for faces, edges and vertices from graph */ 5203 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5204 /* print some info */ 5205 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5206 PetscInt nv; 5207 5208 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5209 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5210 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5211 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5212 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5213 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5215 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5216 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5217 } 5218 5219 /* free unneeded index sets */ 5220 if (!pcbddc->use_vertices) { 5221 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5222 } 5223 if (!pcbddc->use_edges) { 5224 for (i=0;i<n_ISForEdges;i++) { 5225 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5226 } 5227 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5228 n_ISForEdges = 0; 5229 } 5230 if (!pcbddc->use_faces) { 5231 for (i=0;i<n_ISForFaces;i++) { 5232 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5233 } 5234 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5235 n_ISForFaces = 0; 5236 } 5237 5238 /* check if near null space is attached to global mat */ 5239 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5240 if (nearnullsp) { 5241 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5242 /* remove any stored info */ 5243 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5244 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5245 /* store information for BDDC solver reuse */ 5246 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5247 pcbddc->onearnullspace = nearnullsp; 5248 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5249 for (i=0;i<nnsp_size;i++) { 5250 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5251 } 5252 } else { /* if near null space is not provided BDDC uses constants by default */ 5253 nnsp_size = 0; 5254 nnsp_has_cnst = PETSC_TRUE; 5255 } 5256 /* get max number of constraints on a single cc */ 5257 max_constraints = nnsp_size; 5258 if (nnsp_has_cnst) max_constraints++; 5259 5260 /* 5261 Evaluate maximum storage size needed by the procedure 5262 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5263 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5264 There can be multiple constraints per connected component 5265 */ 5266 n_vertices = 0; 5267 if (ISForVertices) { 5268 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5269 } 5270 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5271 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5272 5273 total_counts = n_ISForFaces+n_ISForEdges; 5274 total_counts *= max_constraints; 5275 total_counts += n_vertices; 5276 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5277 5278 total_counts = 0; 5279 max_size_of_constraint = 0; 5280 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5281 IS used_is; 5282 if (i<n_ISForEdges) { 5283 used_is = ISForEdges[i]; 5284 } else { 5285 used_is = ISForFaces[i-n_ISForEdges]; 5286 } 5287 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5288 total_counts += j; 5289 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5290 } 5291 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5292 5293 /* get local part of global near null space vectors */ 5294 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5295 for (k=0;k<nnsp_size;k++) { 5296 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5297 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5298 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5299 } 5300 5301 /* whether or not to skip lapack calls */ 5302 skip_lapack = PETSC_TRUE; 5303 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5304 5305 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5306 if (!skip_lapack) { 5307 PetscScalar temp_work; 5308 5309 #if defined(PETSC_MISSING_LAPACK_GESVD) 5310 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5311 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5312 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5313 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5314 #if defined(PETSC_USE_COMPLEX) 5315 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5316 #endif 5317 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5318 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5319 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5320 lwork = -1; 5321 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5322 #if !defined(PETSC_USE_COMPLEX) 5323 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5324 #else 5325 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5326 #endif 5327 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5328 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5329 #else /* on missing GESVD */ 5330 /* SVD */ 5331 PetscInt max_n,min_n; 5332 max_n = max_size_of_constraint; 5333 min_n = max_constraints; 5334 if (max_size_of_constraint < max_constraints) { 5335 min_n = max_size_of_constraint; 5336 max_n = max_constraints; 5337 } 5338 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5339 #if defined(PETSC_USE_COMPLEX) 5340 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5341 #endif 5342 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5343 lwork = -1; 5344 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5345 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5346 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5347 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5348 #if !defined(PETSC_USE_COMPLEX) 5349 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5350 #else 5351 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5352 #endif 5353 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5354 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5355 #endif /* on missing GESVD */ 5356 /* Allocate optimal workspace */ 5357 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5358 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5359 } 5360 /* Now we can loop on constraining sets */ 5361 total_counts = 0; 5362 constraints_idxs_ptr[0] = 0; 5363 constraints_data_ptr[0] = 0; 5364 /* vertices */ 5365 if (n_vertices) { 5366 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5367 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5368 for (i=0;i<n_vertices;i++) { 5369 constraints_n[total_counts] = 1; 5370 constraints_data[total_counts] = 1.0; 5371 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5372 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5373 total_counts++; 5374 } 5375 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5376 n_vertices = total_counts; 5377 } 5378 5379 /* edges and faces */ 5380 total_counts_cc = total_counts; 5381 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5382 IS used_is; 5383 PetscBool idxs_copied = PETSC_FALSE; 5384 5385 if (ncc<n_ISForEdges) { 5386 used_is = ISForEdges[ncc]; 5387 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5388 } else { 5389 used_is = ISForFaces[ncc-n_ISForEdges]; 5390 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5391 } 5392 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5393 5394 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5395 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5396 /* change of basis should not be performed on local periodic nodes */ 5397 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5398 if (nnsp_has_cnst) { 5399 PetscScalar quad_value; 5400 5401 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5402 idxs_copied = PETSC_TRUE; 5403 5404 if (!pcbddc->use_nnsp_true) { 5405 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5406 } else { 5407 quad_value = 1.0; 5408 } 5409 for (j=0;j<size_of_constraint;j++) { 5410 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5411 } 5412 temp_constraints++; 5413 total_counts++; 5414 } 5415 for (k=0;k<nnsp_size;k++) { 5416 PetscReal real_value; 5417 PetscScalar *ptr_to_data; 5418 5419 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5420 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5421 for (j=0;j<size_of_constraint;j++) { 5422 ptr_to_data[j] = array[is_indices[j]]; 5423 } 5424 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5425 /* check if array is null on the connected component */ 5426 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5427 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5428 if (real_value > 0.0) { /* keep indices and values */ 5429 temp_constraints++; 5430 total_counts++; 5431 if (!idxs_copied) { 5432 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5433 idxs_copied = PETSC_TRUE; 5434 } 5435 } 5436 } 5437 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5438 valid_constraints = temp_constraints; 5439 if (!pcbddc->use_nnsp_true && temp_constraints) { 5440 if (temp_constraints == 1) { /* just normalize the constraint */ 5441 PetscScalar norm,*ptr_to_data; 5442 5443 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5444 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5445 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5446 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5447 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5448 } else { /* perform SVD */ 5449 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5450 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5451 5452 #if defined(PETSC_MISSING_LAPACK_GESVD) 5453 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5454 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5455 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5456 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5457 from that computed using LAPACKgesvd 5458 -> This is due to a different computation of eigenvectors in LAPACKheev 5459 -> The quality of the POD-computed basis will be the same */ 5460 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5461 /* Store upper triangular part of correlation matrix */ 5462 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5463 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5464 for (j=0;j<temp_constraints;j++) { 5465 for (k=0;k<j+1;k++) { 5466 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5467 } 5468 } 5469 /* compute eigenvalues and eigenvectors of correlation matrix */ 5470 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5471 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5472 #if !defined(PETSC_USE_COMPLEX) 5473 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5474 #else 5475 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5476 #endif 5477 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5478 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5479 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5480 j = 0; 5481 while (j < temp_constraints && singular_vals[j] < tol) j++; 5482 total_counts = total_counts-j; 5483 valid_constraints = temp_constraints-j; 5484 /* scale and copy POD basis into used quadrature memory */ 5485 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5486 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5487 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5488 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5489 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5490 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5491 if (j<temp_constraints) { 5492 PetscInt ii; 5493 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5494 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5495 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5496 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5497 for (k=0;k<temp_constraints-j;k++) { 5498 for (ii=0;ii<size_of_constraint;ii++) { 5499 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5500 } 5501 } 5502 } 5503 #else /* on missing GESVD */ 5504 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5505 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5506 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5507 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5508 #if !defined(PETSC_USE_COMPLEX) 5509 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5510 #else 5511 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5512 #endif 5513 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5514 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5515 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5516 k = temp_constraints; 5517 if (k > size_of_constraint) k = size_of_constraint; 5518 j = 0; 5519 while (j < k && singular_vals[k-j-1] < tol) j++; 5520 valid_constraints = k-j; 5521 total_counts = total_counts-temp_constraints+valid_constraints; 5522 #endif /* on missing GESVD */ 5523 } 5524 } 5525 /* update pointers information */ 5526 if (valid_constraints) { 5527 constraints_n[total_counts_cc] = valid_constraints; 5528 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5529 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5530 /* set change_of_basis flag */ 5531 if (boolforchange) { 5532 PetscBTSet(change_basis,total_counts_cc); 5533 } 5534 total_counts_cc++; 5535 } 5536 } 5537 /* free workspace */ 5538 if (!skip_lapack) { 5539 ierr = PetscFree(work);CHKERRQ(ierr); 5540 #if defined(PETSC_USE_COMPLEX) 5541 ierr = PetscFree(rwork);CHKERRQ(ierr); 5542 #endif 5543 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5544 #if defined(PETSC_MISSING_LAPACK_GESVD) 5545 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5546 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5547 #endif 5548 } 5549 for (k=0;k<nnsp_size;k++) { 5550 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5551 } 5552 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5553 /* free index sets of faces, edges and vertices */ 5554 for (i=0;i<n_ISForFaces;i++) { 5555 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5556 } 5557 if (n_ISForFaces) { 5558 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5559 } 5560 for (i=0;i<n_ISForEdges;i++) { 5561 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5562 } 5563 if (n_ISForEdges) { 5564 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5565 } 5566 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5567 } else { 5568 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5569 5570 total_counts = 0; 5571 n_vertices = 0; 5572 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5573 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5574 } 5575 max_constraints = 0; 5576 total_counts_cc = 0; 5577 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5578 total_counts += pcbddc->adaptive_constraints_n[i]; 5579 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5580 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5581 } 5582 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5583 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5584 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5585 constraints_data = pcbddc->adaptive_constraints_data; 5586 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5587 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5588 total_counts_cc = 0; 5589 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5590 if (pcbddc->adaptive_constraints_n[i]) { 5591 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5592 } 5593 } 5594 #if 0 5595 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5596 for (i=0;i<total_counts_cc;i++) { 5597 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5598 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5599 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5600 printf(" %d",constraints_idxs[j]); 5601 } 5602 printf("\n"); 5603 printf("number of cc: %d\n",constraints_n[i]); 5604 } 5605 for (i=0;i<n_vertices;i++) { 5606 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5607 } 5608 for (i=0;i<sub_schurs->n_subs;i++) { 5609 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5610 } 5611 #endif 5612 5613 max_size_of_constraint = 0; 5614 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5615 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5616 /* Change of basis */ 5617 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5618 if (pcbddc->use_change_of_basis) { 5619 for (i=0;i<sub_schurs->n_subs;i++) { 5620 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5621 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5622 } 5623 } 5624 } 5625 } 5626 pcbddc->local_primal_size = total_counts; 5627 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5628 5629 /* map constraints_idxs in boundary numbering */ 5630 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5631 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5632 5633 /* Create constraint matrix */ 5634 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5635 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5636 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5637 5638 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5639 /* determine if a QR strategy is needed for change of basis */ 5640 qr_needed = PETSC_FALSE; 5641 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5642 total_primal_vertices=0; 5643 pcbddc->local_primal_size_cc = 0; 5644 for (i=0;i<total_counts_cc;i++) { 5645 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5646 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5647 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5648 pcbddc->local_primal_size_cc += 1; 5649 } else if (PetscBTLookup(change_basis,i)) { 5650 for (k=0;k<constraints_n[i];k++) { 5651 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5652 } 5653 pcbddc->local_primal_size_cc += constraints_n[i]; 5654 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5655 PetscBTSet(qr_needed_idx,i); 5656 qr_needed = PETSC_TRUE; 5657 } 5658 } else { 5659 pcbddc->local_primal_size_cc += 1; 5660 } 5661 } 5662 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5663 pcbddc->n_vertices = total_primal_vertices; 5664 /* permute indices in order to have a sorted set of vertices */ 5665 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5666 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5667 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5668 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5669 5670 /* nonzero structure of constraint matrix */ 5671 /* and get reference dof for local constraints */ 5672 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5673 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5674 5675 j = total_primal_vertices; 5676 total_counts = total_primal_vertices; 5677 cum = total_primal_vertices; 5678 for (i=n_vertices;i<total_counts_cc;i++) { 5679 if (!PetscBTLookup(change_basis,i)) { 5680 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5681 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5682 cum++; 5683 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5684 for (k=0;k<constraints_n[i];k++) { 5685 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5686 nnz[j+k] = size_of_constraint; 5687 } 5688 j += constraints_n[i]; 5689 } 5690 } 5691 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5692 ierr = PetscFree(nnz);CHKERRQ(ierr); 5693 5694 /* set values in constraint matrix */ 5695 for (i=0;i<total_primal_vertices;i++) { 5696 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5697 } 5698 total_counts = total_primal_vertices; 5699 for (i=n_vertices;i<total_counts_cc;i++) { 5700 if (!PetscBTLookup(change_basis,i)) { 5701 PetscInt *cols; 5702 5703 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5704 cols = constraints_idxs+constraints_idxs_ptr[i]; 5705 for (k=0;k<constraints_n[i];k++) { 5706 PetscInt row = total_counts+k; 5707 PetscScalar *vals; 5708 5709 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5710 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5711 } 5712 total_counts += constraints_n[i]; 5713 } 5714 } 5715 /* assembling */ 5716 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5717 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5718 5719 /* 5720 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5721 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5722 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5723 */ 5724 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5725 if (pcbddc->use_change_of_basis) { 5726 /* dual and primal dofs on a single cc */ 5727 PetscInt dual_dofs,primal_dofs; 5728 /* working stuff for GEQRF */ 5729 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5730 PetscBLASInt lqr_work; 5731 /* working stuff for UNGQR */ 5732 PetscScalar *gqr_work,lgqr_work_t; 5733 PetscBLASInt lgqr_work; 5734 /* working stuff for TRTRS */ 5735 PetscScalar *trs_rhs; 5736 PetscBLASInt Blas_NRHS; 5737 /* pointers for values insertion into change of basis matrix */ 5738 PetscInt *start_rows,*start_cols; 5739 PetscScalar *start_vals; 5740 /* working stuff for values insertion */ 5741 PetscBT is_primal; 5742 PetscInt *aux_primal_numbering_B; 5743 /* matrix sizes */ 5744 PetscInt global_size,local_size; 5745 /* temporary change of basis */ 5746 Mat localChangeOfBasisMatrix; 5747 /* extra space for debugging */ 5748 PetscScalar *dbg_work; 5749 5750 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5751 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5752 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5753 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5754 /* nonzeros for local mat */ 5755 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5756 if (!pcbddc->benign_change || pcbddc->fake_change) { 5757 for (i=0;i<pcis->n;i++) nnz[i]=1; 5758 } else { 5759 const PetscInt *ii; 5760 PetscInt n; 5761 PetscBool flg_row; 5762 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5763 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5764 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5765 } 5766 for (i=n_vertices;i<total_counts_cc;i++) { 5767 if (PetscBTLookup(change_basis,i)) { 5768 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5769 if (PetscBTLookup(qr_needed_idx,i)) { 5770 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5771 } else { 5772 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5773 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5774 } 5775 } 5776 } 5777 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5778 ierr = PetscFree(nnz);CHKERRQ(ierr); 5779 /* Set interior change in the matrix */ 5780 if (!pcbddc->benign_change || pcbddc->fake_change) { 5781 for (i=0;i<pcis->n;i++) { 5782 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5783 } 5784 } else { 5785 const PetscInt *ii,*jj; 5786 PetscScalar *aa; 5787 PetscInt n; 5788 PetscBool flg_row; 5789 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5790 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5791 for (i=0;i<n;i++) { 5792 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5793 } 5794 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5795 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5796 } 5797 5798 if (pcbddc->dbg_flag) { 5799 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5800 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5801 } 5802 5803 5804 /* Now we loop on the constraints which need a change of basis */ 5805 /* 5806 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5807 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5808 5809 Basic blocks of change of basis matrix T computed by 5810 5811 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5812 5813 | 1 0 ... 0 s_1/S | 5814 | 0 1 ... 0 s_2/S | 5815 | ... | 5816 | 0 ... 1 s_{n-1}/S | 5817 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5818 5819 with S = \sum_{i=1}^n s_i^2 5820 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5821 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5822 5823 - QR decomposition of constraints otherwise 5824 */ 5825 if (qr_needed) { 5826 /* space to store Q */ 5827 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5828 /* array to store scaling factors for reflectors */ 5829 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5830 /* first we issue queries for optimal work */ 5831 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5832 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5833 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5834 lqr_work = -1; 5835 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5836 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5837 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5838 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5839 lgqr_work = -1; 5840 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5841 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5842 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5843 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5844 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5845 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5846 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5847 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5848 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5849 /* array to store rhs and solution of triangular solver */ 5850 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5851 /* allocating workspace for check */ 5852 if (pcbddc->dbg_flag) { 5853 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5854 } 5855 } 5856 /* array to store whether a node is primal or not */ 5857 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5858 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5859 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5860 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5861 for (i=0;i<total_primal_vertices;i++) { 5862 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5863 } 5864 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5865 5866 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5867 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5868 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5869 if (PetscBTLookup(change_basis,total_counts)) { 5870 /* get constraint info */ 5871 primal_dofs = constraints_n[total_counts]; 5872 dual_dofs = size_of_constraint-primal_dofs; 5873 5874 if (pcbddc->dbg_flag) { 5875 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5876 } 5877 5878 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5879 5880 /* copy quadrature constraints for change of basis check */ 5881 if (pcbddc->dbg_flag) { 5882 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5883 } 5884 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5885 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5886 5887 /* compute QR decomposition of constraints */ 5888 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5889 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5890 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5891 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5892 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5893 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5894 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5895 5896 /* explictly compute R^-T */ 5897 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5898 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5899 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5900 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5901 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5902 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5904 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5905 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5906 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5907 5908 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5909 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5910 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5911 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5912 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5913 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5914 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5915 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5916 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5917 5918 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5919 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5920 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5921 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5922 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5923 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5924 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5925 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5926 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5927 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5928 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5930 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5931 5932 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5933 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5934 /* insert cols for primal dofs */ 5935 for (j=0;j<primal_dofs;j++) { 5936 start_vals = &qr_basis[j*size_of_constraint]; 5937 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5938 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5939 } 5940 /* insert cols for dual dofs */ 5941 for (j=0,k=0;j<dual_dofs;k++) { 5942 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5943 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5944 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5945 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5946 j++; 5947 } 5948 } 5949 5950 /* check change of basis */ 5951 if (pcbddc->dbg_flag) { 5952 PetscInt ii,jj; 5953 PetscBool valid_qr=PETSC_TRUE; 5954 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5955 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5957 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5959 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5960 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5961 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5962 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5963 for (jj=0;jj<size_of_constraint;jj++) { 5964 for (ii=0;ii<primal_dofs;ii++) { 5965 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5966 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5967 } 5968 } 5969 if (!valid_qr) { 5970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5971 for (jj=0;jj<size_of_constraint;jj++) { 5972 for (ii=0;ii<primal_dofs;ii++) { 5973 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5974 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5975 } 5976 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5977 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5978 } 5979 } 5980 } 5981 } else { 5982 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5983 } 5984 } 5985 } else { /* simple transformation block */ 5986 PetscInt row,col; 5987 PetscScalar val,norm; 5988 5989 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5990 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5991 for (j=0;j<size_of_constraint;j++) { 5992 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5993 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5994 if (!PetscBTLookup(is_primal,row_B)) { 5995 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5996 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5997 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5998 } else { 5999 for (k=0;k<size_of_constraint;k++) { 6000 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6001 if (row != col) { 6002 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6003 } else { 6004 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6005 } 6006 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6007 } 6008 } 6009 } 6010 if (pcbddc->dbg_flag) { 6011 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6012 } 6013 } 6014 } else { 6015 if (pcbddc->dbg_flag) { 6016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6017 } 6018 } 6019 } 6020 6021 /* free workspace */ 6022 if (qr_needed) { 6023 if (pcbddc->dbg_flag) { 6024 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6025 } 6026 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6027 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6028 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6029 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6030 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6031 } 6032 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6033 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6034 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6035 6036 /* assembling of global change of variable */ 6037 if (!pcbddc->fake_change) { 6038 Mat tmat; 6039 PetscInt bs; 6040 6041 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6042 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6043 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6044 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6045 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6046 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6047 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6048 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6049 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6050 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6051 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6052 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6053 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6054 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6055 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6056 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6057 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6058 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6059 6060 /* check */ 6061 if (pcbddc->dbg_flag) { 6062 PetscReal error; 6063 Vec x,x_change; 6064 6065 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6066 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6067 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6068 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6069 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6070 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6071 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6072 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6073 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6074 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6075 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6076 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6077 if (error > PETSC_SMALL) { 6078 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6079 } 6080 ierr = VecDestroy(&x);CHKERRQ(ierr); 6081 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6082 } 6083 /* adapt sub_schurs computed (if any) */ 6084 if (pcbddc->use_deluxe_scaling) { 6085 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6086 6087 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6088 if (sub_schurs && sub_schurs->S_Ej_all) { 6089 Mat S_new,tmat; 6090 IS is_all_N,is_V_Sall = NULL; 6091 6092 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6093 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6094 if (pcbddc->deluxe_zerorows) { 6095 ISLocalToGlobalMapping NtoSall; 6096 IS is_V; 6097 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6098 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6099 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6100 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6101 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6102 } 6103 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6104 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6105 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6106 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6107 if (pcbddc->deluxe_zerorows) { 6108 const PetscScalar *array; 6109 const PetscInt *idxs_V,*idxs_all; 6110 PetscInt i,n_V; 6111 6112 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6113 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6114 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6115 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6116 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6117 for (i=0;i<n_V;i++) { 6118 PetscScalar val; 6119 PetscInt idx; 6120 6121 idx = idxs_V[i]; 6122 val = array[idxs_all[idxs_V[i]]]; 6123 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6124 } 6125 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6126 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6127 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6128 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6129 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6130 } 6131 sub_schurs->S_Ej_all = S_new; 6132 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6133 if (sub_schurs->sum_S_Ej_all) { 6134 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6135 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6136 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6137 if (pcbddc->deluxe_zerorows) { 6138 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6139 } 6140 sub_schurs->sum_S_Ej_all = S_new; 6141 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6142 } 6143 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6144 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6145 } 6146 /* destroy any change of basis context in sub_schurs */ 6147 if (sub_schurs && sub_schurs->change) { 6148 PetscInt i; 6149 6150 for (i=0;i<sub_schurs->n_subs;i++) { 6151 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6152 } 6153 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6154 } 6155 } 6156 if (pcbddc->switch_static) { /* need to save the local change */ 6157 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6158 } else { 6159 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6160 } 6161 /* determine if any process has changed the pressures locally */ 6162 pcbddc->change_interior = pcbddc->benign_have_null; 6163 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6164 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6165 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6166 pcbddc->use_qr_single = qr_needed; 6167 } 6168 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6169 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6170 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6171 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6172 } else { 6173 Mat benign_global = NULL; 6174 if (pcbddc->benign_have_null) { 6175 Mat tmat; 6176 6177 pcbddc->change_interior = PETSC_TRUE; 6178 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6179 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6180 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6181 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6182 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6183 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6184 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6185 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6186 if (pcbddc->benign_change) { 6187 Mat M; 6188 6189 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6190 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6191 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6192 ierr = MatDestroy(&M);CHKERRQ(ierr); 6193 } else { 6194 Mat eye; 6195 PetscScalar *array; 6196 6197 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6198 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6199 for (i=0;i<pcis->n;i++) { 6200 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6201 } 6202 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6203 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6204 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6205 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6206 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6207 } 6208 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6209 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6210 } 6211 if (pcbddc->user_ChangeOfBasisMatrix) { 6212 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6213 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6214 } else if (pcbddc->benign_have_null) { 6215 pcbddc->ChangeOfBasisMatrix = benign_global; 6216 } 6217 } 6218 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6219 IS is_global; 6220 const PetscInt *gidxs; 6221 6222 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6224 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6225 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6226 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6227 } 6228 } 6229 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6230 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6231 } 6232 6233 if (!pcbddc->fake_change) { 6234 /* add pressure dofs to set of primal nodes for numbering purposes */ 6235 for (i=0;i<pcbddc->benign_n;i++) { 6236 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6237 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6238 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6239 pcbddc->local_primal_size_cc++; 6240 pcbddc->local_primal_size++; 6241 } 6242 6243 /* check if a new primal space has been introduced (also take into account benign trick) */ 6244 pcbddc->new_primal_space_local = PETSC_TRUE; 6245 if (olocal_primal_size == pcbddc->local_primal_size) { 6246 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6247 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6248 if (!pcbddc->new_primal_space_local) { 6249 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6250 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6251 } 6252 } 6253 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6254 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6255 } 6256 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6257 6258 /* flush dbg viewer */ 6259 if (pcbddc->dbg_flag) { 6260 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6261 } 6262 6263 /* free workspace */ 6264 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6265 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6266 if (!pcbddc->adaptive_selection) { 6267 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6268 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6269 } else { 6270 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6271 pcbddc->adaptive_constraints_idxs_ptr, 6272 pcbddc->adaptive_constraints_data_ptr, 6273 pcbddc->adaptive_constraints_idxs, 6274 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6275 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6276 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6277 } 6278 PetscFunctionReturn(0); 6279 } 6280 6281 #undef __FUNCT__ 6282 #define __FUNCT__ "PCBDDCAnalyzeInterface" 6283 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6284 { 6285 ISLocalToGlobalMapping map; 6286 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6287 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6288 PetscInt ierr,i,N; 6289 6290 PetscFunctionBegin; 6291 if (pcbddc->recompute_topography) { 6292 pcbddc->graphanalyzed = PETSC_FALSE; 6293 /* Reset previously computed graph */ 6294 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6295 /* Init local Graph struct */ 6296 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6297 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6298 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6299 6300 /* Check validity of the csr graph passed in by the user */ 6301 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6302 6303 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6304 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 6305 PetscInt *xadj,*adjncy; 6306 PetscInt nvtxs; 6307 PetscBool flg_row=PETSC_FALSE; 6308 6309 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6310 if (flg_row) { 6311 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6312 pcbddc->computed_rowadj = PETSC_TRUE; 6313 } 6314 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6315 } 6316 if (pcbddc->dbg_flag) { 6317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6318 } 6319 6320 /* Setup of Graph */ 6321 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6322 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6323 6324 /* attach info on disconnected subdomains if present */ 6325 if (pcbddc->n_local_subs) { 6326 PetscInt *local_subs; 6327 6328 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6329 for (i=0;i<pcbddc->n_local_subs;i++) { 6330 const PetscInt *idxs; 6331 PetscInt nl,j; 6332 6333 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6334 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6335 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6336 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6337 } 6338 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6339 pcbddc->mat_graph->local_subs = local_subs; 6340 } 6341 } 6342 6343 if (!pcbddc->graphanalyzed) { 6344 /* Graph's connected components analysis */ 6345 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6346 pcbddc->graphanalyzed = PETSC_TRUE; 6347 } 6348 PetscFunctionReturn(0); 6349 } 6350 6351 #undef __FUNCT__ 6352 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6353 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6354 { 6355 PetscInt i,j; 6356 PetscScalar *alphas; 6357 PetscErrorCode ierr; 6358 6359 PetscFunctionBegin; 6360 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6361 for (i=0;i<n;i++) { 6362 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6363 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6364 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6365 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6366 } 6367 ierr = PetscFree(alphas);CHKERRQ(ierr); 6368 PetscFunctionReturn(0); 6369 } 6370 6371 #undef __FUNCT__ 6372 #define __FUNCT__ "PCBDDCMatISGetSubassemblingPattern" 6373 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6374 { 6375 Mat A; 6376 PetscInt n_neighs,*neighs,*n_shared,**shared; 6377 PetscMPIInt size,rank,color; 6378 PetscInt *xadj,*adjncy; 6379 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6380 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6381 PetscInt void_procs,*procs_candidates = NULL; 6382 PetscInt xadj_count,*count; 6383 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6384 PetscSubcomm psubcomm; 6385 MPI_Comm subcomm; 6386 PetscErrorCode ierr; 6387 6388 PetscFunctionBegin; 6389 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6390 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6391 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6392 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6393 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6394 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6395 6396 if (have_void) *have_void = PETSC_FALSE; 6397 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6398 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6399 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6400 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6401 im_active = !!n; 6402 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6403 void_procs = size - active_procs; 6404 /* get ranks of of non-active processes in mat communicator */ 6405 if (void_procs) { 6406 PetscInt ncand; 6407 6408 if (have_void) *have_void = PETSC_TRUE; 6409 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6410 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6411 for (i=0,ncand=0;i<size;i++) { 6412 if (!procs_candidates[i]) { 6413 procs_candidates[ncand++] = i; 6414 } 6415 } 6416 /* force n_subdomains to be not greater that the number of non-active processes */ 6417 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6418 } 6419 6420 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6421 number of subdomains requested 1 -> send to master or first candidate in voids */ 6422 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6423 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6424 PetscInt issize,isidx,dest; 6425 if (*n_subdomains == 1) dest = 0; 6426 else dest = rank; 6427 if (im_active) { 6428 issize = 1; 6429 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6430 isidx = procs_candidates[dest]; 6431 } else { 6432 isidx = dest; 6433 } 6434 } else { 6435 issize = 0; 6436 isidx = -1; 6437 } 6438 if (*n_subdomains != 1) *n_subdomains = active_procs; 6439 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6440 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6441 PetscFunctionReturn(0); 6442 } 6443 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6444 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6445 threshold = PetscMax(threshold,2); 6446 6447 /* Get info on mapping */ 6448 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6449 6450 /* build local CSR graph of subdomains' connectivity */ 6451 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6452 xadj[0] = 0; 6453 xadj[1] = PetscMax(n_neighs-1,0); 6454 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6455 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6456 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6457 for (i=1;i<n_neighs;i++) 6458 for (j=0;j<n_shared[i];j++) 6459 count[shared[i][j]] += 1; 6460 6461 xadj_count = 0; 6462 for (i=1;i<n_neighs;i++) { 6463 for (j=0;j<n_shared[i];j++) { 6464 if (count[shared[i][j]] < threshold) { 6465 adjncy[xadj_count] = neighs[i]; 6466 adjncy_wgt[xadj_count] = n_shared[i]; 6467 xadj_count++; 6468 break; 6469 } 6470 } 6471 } 6472 xadj[1] = xadj_count; 6473 ierr = PetscFree(count);CHKERRQ(ierr); 6474 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6475 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6476 6477 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6478 6479 /* Restrict work on active processes only */ 6480 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6481 if (void_procs) { 6482 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6483 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6484 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6485 subcomm = PetscSubcommChild(psubcomm); 6486 } else { 6487 psubcomm = NULL; 6488 subcomm = PetscObjectComm((PetscObject)mat); 6489 } 6490 6491 v_wgt = NULL; 6492 if (!color) { 6493 ierr = PetscFree(xadj);CHKERRQ(ierr); 6494 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6495 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6496 } else { 6497 Mat subdomain_adj; 6498 IS new_ranks,new_ranks_contig; 6499 MatPartitioning partitioner; 6500 PetscInt rstart=0,rend=0; 6501 PetscInt *is_indices,*oldranks; 6502 PetscMPIInt size; 6503 PetscBool aggregate; 6504 6505 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6506 if (void_procs) { 6507 PetscInt prank = rank; 6508 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6509 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6510 for (i=0;i<xadj[1];i++) { 6511 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6512 } 6513 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6514 } else { 6515 oldranks = NULL; 6516 } 6517 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6518 if (aggregate) { /* TODO: all this part could be made more efficient */ 6519 PetscInt lrows,row,ncols,*cols; 6520 PetscMPIInt nrank; 6521 PetscScalar *vals; 6522 6523 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6524 lrows = 0; 6525 if (nrank<redprocs) { 6526 lrows = size/redprocs; 6527 if (nrank<size%redprocs) lrows++; 6528 } 6529 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6530 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6531 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6532 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6533 row = nrank; 6534 ncols = xadj[1]-xadj[0]; 6535 cols = adjncy; 6536 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6537 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6538 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6539 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6540 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6541 ierr = PetscFree(xadj);CHKERRQ(ierr); 6542 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6543 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6544 ierr = PetscFree(vals);CHKERRQ(ierr); 6545 if (use_vwgt) { 6546 Vec v; 6547 const PetscScalar *array; 6548 PetscInt nl; 6549 6550 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6551 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6552 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6553 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6554 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6555 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6556 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6557 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6558 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6559 ierr = VecDestroy(&v);CHKERRQ(ierr); 6560 } 6561 } else { 6562 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6563 if (use_vwgt) { 6564 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6565 v_wgt[0] = n; 6566 } 6567 } 6568 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6569 6570 /* Partition */ 6571 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6572 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6573 if (v_wgt) { 6574 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6575 } 6576 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6577 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6578 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6579 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6580 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6581 6582 /* renumber new_ranks to avoid "holes" in new set of processors */ 6583 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6584 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6585 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6586 if (!aggregate) { 6587 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6588 #if defined(PETSC_USE_DEBUG) 6589 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6590 #endif 6591 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6592 } else if (oldranks) { 6593 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6594 } else { 6595 ranks_send_to_idx[0] = is_indices[0]; 6596 } 6597 } else { 6598 PetscInt idxs[1]; 6599 PetscMPIInt tag; 6600 MPI_Request *reqs; 6601 6602 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6603 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6604 for (i=rstart;i<rend;i++) { 6605 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6606 } 6607 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6608 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6609 ierr = PetscFree(reqs);CHKERRQ(ierr); 6610 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6611 #if defined(PETSC_USE_DEBUG) 6612 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6613 #endif 6614 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6615 } else if (oldranks) { 6616 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6617 } else { 6618 ranks_send_to_idx[0] = idxs[0]; 6619 } 6620 } 6621 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6622 /* clean up */ 6623 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6624 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6625 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6626 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6627 } 6628 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6629 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6630 6631 /* assemble parallel IS for sends */ 6632 i = 1; 6633 if (!color) i=0; 6634 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6635 PetscFunctionReturn(0); 6636 } 6637 6638 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6639 6640 #undef __FUNCT__ 6641 #define __FUNCT__ "PCBDDCMatISSubassemble" 6642 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6643 { 6644 Mat local_mat; 6645 IS is_sends_internal; 6646 PetscInt rows,cols,new_local_rows; 6647 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6648 PetscBool ismatis,isdense,newisdense,destroy_mat; 6649 ISLocalToGlobalMapping l2gmap; 6650 PetscInt* l2gmap_indices; 6651 const PetscInt* is_indices; 6652 MatType new_local_type; 6653 /* buffers */ 6654 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6655 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6656 PetscInt *recv_buffer_idxs_local; 6657 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6658 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6659 /* MPI */ 6660 MPI_Comm comm,comm_n; 6661 PetscSubcomm subcomm; 6662 PetscMPIInt n_sends,n_recvs,commsize; 6663 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6664 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6665 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6666 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6667 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6668 PetscErrorCode ierr; 6669 6670 PetscFunctionBegin; 6671 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6672 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6673 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6674 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6675 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6676 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6677 PetscValidLogicalCollectiveBool(mat,reuse,6); 6678 PetscValidLogicalCollectiveInt(mat,nis,8); 6679 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6680 if (nvecs) { 6681 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6682 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6683 } 6684 /* further checks */ 6685 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6686 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6687 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6688 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6689 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6690 if (reuse && *mat_n) { 6691 PetscInt mrows,mcols,mnrows,mncols; 6692 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6693 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6694 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6695 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6696 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6697 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6698 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6699 } 6700 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6701 PetscValidLogicalCollectiveInt(mat,bs,0); 6702 6703 /* prepare IS for sending if not provided */ 6704 if (!is_sends) { 6705 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6706 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6707 } else { 6708 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6709 is_sends_internal = is_sends; 6710 } 6711 6712 /* get comm */ 6713 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6714 6715 /* compute number of sends */ 6716 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6717 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6718 6719 /* compute number of receives */ 6720 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6721 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6722 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6723 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6724 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6725 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6726 ierr = PetscFree(iflags);CHKERRQ(ierr); 6727 6728 /* restrict comm if requested */ 6729 subcomm = 0; 6730 destroy_mat = PETSC_FALSE; 6731 if (restrict_comm) { 6732 PetscMPIInt color,subcommsize; 6733 6734 color = 0; 6735 if (restrict_full) { 6736 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6737 } else { 6738 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6739 } 6740 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6741 subcommsize = commsize - subcommsize; 6742 /* check if reuse has been requested */ 6743 if (reuse) { 6744 if (*mat_n) { 6745 PetscMPIInt subcommsize2; 6746 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6747 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6748 comm_n = PetscObjectComm((PetscObject)*mat_n); 6749 } else { 6750 comm_n = PETSC_COMM_SELF; 6751 } 6752 } else { /* MAT_INITIAL_MATRIX */ 6753 PetscMPIInt rank; 6754 6755 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6756 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6757 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6758 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6759 comm_n = PetscSubcommChild(subcomm); 6760 } 6761 /* flag to destroy *mat_n if not significative */ 6762 if (color) destroy_mat = PETSC_TRUE; 6763 } else { 6764 comm_n = comm; 6765 } 6766 6767 /* prepare send/receive buffers */ 6768 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6769 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6770 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6771 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6772 if (nis) { 6773 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6774 } 6775 6776 /* Get data from local matrices */ 6777 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6778 /* TODO: See below some guidelines on how to prepare the local buffers */ 6779 /* 6780 send_buffer_vals should contain the raw values of the local matrix 6781 send_buffer_idxs should contain: 6782 - MatType_PRIVATE type 6783 - PetscInt size_of_l2gmap 6784 - PetscInt global_row_indices[size_of_l2gmap] 6785 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6786 */ 6787 else { 6788 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6789 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6790 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6791 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6792 send_buffer_idxs[1] = i; 6793 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6794 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6795 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6796 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6797 for (i=0;i<n_sends;i++) { 6798 ilengths_vals[is_indices[i]] = len*len; 6799 ilengths_idxs[is_indices[i]] = len+2; 6800 } 6801 } 6802 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6803 /* additional is (if any) */ 6804 if (nis) { 6805 PetscMPIInt psum; 6806 PetscInt j; 6807 for (j=0,psum=0;j<nis;j++) { 6808 PetscInt plen; 6809 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6810 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6811 psum += len+1; /* indices + lenght */ 6812 } 6813 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6814 for (j=0,psum=0;j<nis;j++) { 6815 PetscInt plen; 6816 const PetscInt *is_array_idxs; 6817 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6818 send_buffer_idxs_is[psum] = plen; 6819 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6820 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6821 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6822 psum += plen+1; /* indices + lenght */ 6823 } 6824 for (i=0;i<n_sends;i++) { 6825 ilengths_idxs_is[is_indices[i]] = psum; 6826 } 6827 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6828 } 6829 6830 buf_size_idxs = 0; 6831 buf_size_vals = 0; 6832 buf_size_idxs_is = 0; 6833 buf_size_vecs = 0; 6834 for (i=0;i<n_recvs;i++) { 6835 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6836 buf_size_vals += (PetscInt)olengths_vals[i]; 6837 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6838 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6839 } 6840 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6841 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6842 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6843 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6844 6845 /* get new tags for clean communications */ 6846 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6847 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6848 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6849 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6850 6851 /* allocate for requests */ 6852 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6853 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6854 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6855 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6856 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6857 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6858 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6859 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6860 6861 /* communications */ 6862 ptr_idxs = recv_buffer_idxs; 6863 ptr_vals = recv_buffer_vals; 6864 ptr_idxs_is = recv_buffer_idxs_is; 6865 ptr_vecs = recv_buffer_vecs; 6866 for (i=0;i<n_recvs;i++) { 6867 source_dest = onodes[i]; 6868 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6869 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6870 ptr_idxs += olengths_idxs[i]; 6871 ptr_vals += olengths_vals[i]; 6872 if (nis) { 6873 source_dest = onodes_is[i]; 6874 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6875 ptr_idxs_is += olengths_idxs_is[i]; 6876 } 6877 if (nvecs) { 6878 source_dest = onodes[i]; 6879 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6880 ptr_vecs += olengths_idxs[i]-2; 6881 } 6882 } 6883 for (i=0;i<n_sends;i++) { 6884 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6885 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6886 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6887 if (nis) { 6888 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6889 } 6890 if (nvecs) { 6891 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6892 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6893 } 6894 } 6895 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6896 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6897 6898 /* assemble new l2g map */ 6899 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6900 ptr_idxs = recv_buffer_idxs; 6901 new_local_rows = 0; 6902 for (i=0;i<n_recvs;i++) { 6903 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6904 ptr_idxs += olengths_idxs[i]; 6905 } 6906 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6907 ptr_idxs = recv_buffer_idxs; 6908 new_local_rows = 0; 6909 for (i=0;i<n_recvs;i++) { 6910 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6911 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6912 ptr_idxs += olengths_idxs[i]; 6913 } 6914 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6915 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6916 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6917 6918 /* infer new local matrix type from received local matrices type */ 6919 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6920 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6921 if (n_recvs) { 6922 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6923 ptr_idxs = recv_buffer_idxs; 6924 for (i=0;i<n_recvs;i++) { 6925 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6926 new_local_type_private = MATAIJ_PRIVATE; 6927 break; 6928 } 6929 ptr_idxs += olengths_idxs[i]; 6930 } 6931 switch (new_local_type_private) { 6932 case MATDENSE_PRIVATE: 6933 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6934 new_local_type = MATSEQAIJ; 6935 bs = 1; 6936 } else { /* if I receive only 1 dense matrix */ 6937 new_local_type = MATSEQDENSE; 6938 bs = 1; 6939 } 6940 break; 6941 case MATAIJ_PRIVATE: 6942 new_local_type = MATSEQAIJ; 6943 bs = 1; 6944 break; 6945 case MATBAIJ_PRIVATE: 6946 new_local_type = MATSEQBAIJ; 6947 break; 6948 case MATSBAIJ_PRIVATE: 6949 new_local_type = MATSEQSBAIJ; 6950 break; 6951 default: 6952 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6953 break; 6954 } 6955 } else { /* by default, new_local_type is seqdense */ 6956 new_local_type = MATSEQDENSE; 6957 bs = 1; 6958 } 6959 6960 /* create MATIS object if needed */ 6961 if (!reuse) { 6962 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6963 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6964 } else { 6965 /* it also destroys the local matrices */ 6966 if (*mat_n) { 6967 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6968 } else { /* this is a fake object */ 6969 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6970 } 6971 } 6972 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6973 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6974 6975 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6976 6977 /* Global to local map of received indices */ 6978 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6979 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6980 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6981 6982 /* restore attributes -> type of incoming data and its size */ 6983 buf_size_idxs = 0; 6984 for (i=0;i<n_recvs;i++) { 6985 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6986 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6987 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6988 } 6989 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6990 6991 /* set preallocation */ 6992 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6993 if (!newisdense) { 6994 PetscInt *new_local_nnz=0; 6995 6996 ptr_idxs = recv_buffer_idxs_local; 6997 if (n_recvs) { 6998 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6999 } 7000 for (i=0;i<n_recvs;i++) { 7001 PetscInt j; 7002 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7003 for (j=0;j<*(ptr_idxs+1);j++) { 7004 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7005 } 7006 } else { 7007 /* TODO */ 7008 } 7009 ptr_idxs += olengths_idxs[i]; 7010 } 7011 if (new_local_nnz) { 7012 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7013 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7014 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7015 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7016 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7017 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7018 } else { 7019 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7020 } 7021 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7022 } else { 7023 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7024 } 7025 7026 /* set values */ 7027 ptr_vals = recv_buffer_vals; 7028 ptr_idxs = recv_buffer_idxs_local; 7029 for (i=0;i<n_recvs;i++) { 7030 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7031 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7032 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7033 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7034 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7035 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7036 } else { 7037 /* TODO */ 7038 } 7039 ptr_idxs += olengths_idxs[i]; 7040 ptr_vals += olengths_vals[i]; 7041 } 7042 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7043 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7044 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7045 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7046 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7047 7048 #if 0 7049 if (!restrict_comm) { /* check */ 7050 Vec lvec,rvec; 7051 PetscReal infty_error; 7052 7053 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7054 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7055 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7056 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7057 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7058 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7059 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7060 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7061 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7062 } 7063 #endif 7064 7065 /* assemble new additional is (if any) */ 7066 if (nis) { 7067 PetscInt **temp_idxs,*count_is,j,psum; 7068 7069 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7070 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7071 ptr_idxs = recv_buffer_idxs_is; 7072 psum = 0; 7073 for (i=0;i<n_recvs;i++) { 7074 for (j=0;j<nis;j++) { 7075 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7076 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7077 psum += plen; 7078 ptr_idxs += plen+1; /* shift pointer to received data */ 7079 } 7080 } 7081 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7082 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7083 for (i=1;i<nis;i++) { 7084 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7085 } 7086 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7087 ptr_idxs = recv_buffer_idxs_is; 7088 for (i=0;i<n_recvs;i++) { 7089 for (j=0;j<nis;j++) { 7090 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7091 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7092 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7093 ptr_idxs += plen+1; /* shift pointer to received data */ 7094 } 7095 } 7096 for (i=0;i<nis;i++) { 7097 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7098 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7099 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7100 } 7101 ierr = PetscFree(count_is);CHKERRQ(ierr); 7102 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7103 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7104 } 7105 /* free workspace */ 7106 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7107 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7108 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7109 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7110 if (isdense) { 7111 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7112 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7113 } else { 7114 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7115 } 7116 if (nis) { 7117 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7118 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7119 } 7120 7121 if (nvecs) { 7122 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7123 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7124 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7125 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7126 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7127 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7128 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7129 /* set values */ 7130 ptr_vals = recv_buffer_vecs; 7131 ptr_idxs = recv_buffer_idxs_local; 7132 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7133 for (i=0;i<n_recvs;i++) { 7134 PetscInt j; 7135 for (j=0;j<*(ptr_idxs+1);j++) { 7136 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7137 } 7138 ptr_idxs += olengths_idxs[i]; 7139 ptr_vals += olengths_idxs[i]-2; 7140 } 7141 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7142 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7143 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7144 } 7145 7146 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7147 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7148 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7149 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7150 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7151 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7152 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7153 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7154 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7155 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7156 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7157 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7158 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7159 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7160 ierr = PetscFree(onodes);CHKERRQ(ierr); 7161 if (nis) { 7162 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7163 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7164 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7165 } 7166 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7167 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7168 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7169 for (i=0;i<nis;i++) { 7170 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7171 } 7172 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7173 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7174 } 7175 *mat_n = NULL; 7176 } 7177 PetscFunctionReturn(0); 7178 } 7179 7180 /* temporary hack into ksp private data structure */ 7181 #include <petsc/private/kspimpl.h> 7182 7183 #undef __FUNCT__ 7184 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 7185 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7186 { 7187 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7188 PC_IS *pcis = (PC_IS*)pc->data; 7189 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7190 Mat coarsedivudotp = NULL; 7191 Mat coarseG,t_coarse_mat_is; 7192 MatNullSpace CoarseNullSpace = NULL; 7193 ISLocalToGlobalMapping coarse_islg; 7194 IS coarse_is,*isarray; 7195 PetscInt i,im_active=-1,active_procs=-1; 7196 PetscInt nis,nisdofs,nisneu,nisvert; 7197 PC pc_temp; 7198 PCType coarse_pc_type; 7199 KSPType coarse_ksp_type; 7200 PetscBool multilevel_requested,multilevel_allowed; 7201 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7202 PetscInt ncoarse,nedcfield; 7203 PetscBool compute_vecs = PETSC_FALSE; 7204 PetscScalar *array; 7205 MatReuse coarse_mat_reuse; 7206 PetscBool restr, full_restr, have_void; 7207 PetscErrorCode ierr; 7208 7209 PetscFunctionBegin; 7210 /* Assign global numbering to coarse dofs */ 7211 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7212 PetscInt ocoarse_size; 7213 compute_vecs = PETSC_TRUE; 7214 ocoarse_size = pcbddc->coarse_size; 7215 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7216 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7217 /* see if we can avoid some work */ 7218 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7219 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7220 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7221 PC pc; 7222 PetscBool isbddc; 7223 7224 /* temporary workaround since PCBDDC does not have a reset method so far */ 7225 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 7226 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 7227 if (isbddc) { 7228 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 7229 } else { 7230 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7231 } 7232 coarse_reuse = PETSC_FALSE; 7233 } else { /* we can safely reuse already computed coarse matrix */ 7234 coarse_reuse = PETSC_TRUE; 7235 } 7236 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7237 coarse_reuse = PETSC_FALSE; 7238 } 7239 /* reset any subassembling information */ 7240 if (!coarse_reuse || pcbddc->recompute_topography) { 7241 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7242 } 7243 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7244 coarse_reuse = PETSC_TRUE; 7245 } 7246 /* assemble coarse matrix */ 7247 if (coarse_reuse && pcbddc->coarse_ksp) { 7248 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7249 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7250 coarse_mat_reuse = MAT_REUSE_MATRIX; 7251 } else { 7252 coarse_mat = NULL; 7253 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7254 } 7255 7256 /* creates temporary l2gmap and IS for coarse indexes */ 7257 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7258 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7259 7260 /* creates temporary MATIS object for coarse matrix */ 7261 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7262 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7263 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7264 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7265 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7266 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7267 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7268 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7269 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7270 7271 /* count "active" (i.e. with positive local size) and "void" processes */ 7272 im_active = !!(pcis->n); 7273 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7274 7275 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7276 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7277 /* full_restr : just use the receivers from the subassembling pattern */ 7278 coarse_mat_is = NULL; 7279 multilevel_allowed = PETSC_FALSE; 7280 multilevel_requested = PETSC_FALSE; 7281 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7282 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7283 if (multilevel_requested) { 7284 ncoarse = active_procs/pcbddc->coarsening_ratio; 7285 restr = PETSC_FALSE; 7286 full_restr = PETSC_FALSE; 7287 } else { 7288 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7289 restr = PETSC_TRUE; 7290 full_restr = PETSC_TRUE; 7291 } 7292 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7293 ncoarse = PetscMax(1,ncoarse); 7294 if (!pcbddc->coarse_subassembling) { 7295 if (pcbddc->coarsening_ratio > 1) { 7296 if (multilevel_requested) { 7297 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7298 } else { 7299 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7300 } 7301 } else { 7302 PetscMPIInt size,rank; 7303 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7304 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7305 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7306 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7307 } 7308 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7309 PetscInt psum; 7310 PetscMPIInt size; 7311 if (pcbddc->coarse_ksp) psum = 1; 7312 else psum = 0; 7313 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7314 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7315 if (ncoarse < size) have_void = PETSC_TRUE; 7316 } 7317 /* determine if we can go multilevel */ 7318 if (multilevel_requested) { 7319 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7320 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7321 } 7322 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7323 7324 /* dump subassembling pattern */ 7325 if (pcbddc->dbg_flag && multilevel_allowed) { 7326 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7327 } 7328 7329 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7330 nedcfield = -1; 7331 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7332 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7333 const PetscInt *idxs; 7334 ISLocalToGlobalMapping tmap; 7335 7336 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7337 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7338 /* allocate space for temporary storage */ 7339 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7340 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7341 /* allocate for IS array */ 7342 nisdofs = pcbddc->n_ISForDofsLocal; 7343 if (pcbddc->nedclocal) { 7344 if (pcbddc->nedfield > -1) { 7345 nedcfield = pcbddc->nedfield; 7346 } else { 7347 nedcfield = 0; 7348 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7349 nisdofs = 1; 7350 } 7351 } 7352 nisneu = !!pcbddc->NeumannBoundariesLocal; 7353 nisvert = 0; /* nisvert is not used */ 7354 nis = nisdofs + nisneu + nisvert; 7355 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7356 /* dofs splitting */ 7357 for (i=0;i<nisdofs;i++) { 7358 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7359 if (nedcfield != i) { 7360 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7361 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7362 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7363 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7364 } else { 7365 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7366 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7367 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7368 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7369 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7370 } 7371 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7372 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7373 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7374 } 7375 /* neumann boundaries */ 7376 if (pcbddc->NeumannBoundariesLocal) { 7377 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7378 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7379 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7380 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7381 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7382 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7383 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7384 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7385 } 7386 /* free memory */ 7387 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7388 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7389 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7390 } else { 7391 nis = 0; 7392 nisdofs = 0; 7393 nisneu = 0; 7394 nisvert = 0; 7395 isarray = NULL; 7396 } 7397 /* destroy no longer needed map */ 7398 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7399 7400 /* subassemble */ 7401 if (multilevel_allowed) { 7402 Vec vp[1]; 7403 PetscInt nvecs = 0; 7404 PetscBool reuse,reuser; 7405 7406 if (coarse_mat) reuse = PETSC_TRUE; 7407 else reuse = PETSC_FALSE; 7408 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7409 vp[0] = NULL; 7410 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7411 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7412 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7413 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7414 nvecs = 1; 7415 7416 if (pcbddc->divudotp) { 7417 Mat B,loc_divudotp; 7418 Vec v,p; 7419 IS dummy; 7420 PetscInt np; 7421 7422 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7423 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7424 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7425 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7426 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7427 ierr = VecSet(p,1.);CHKERRQ(ierr); 7428 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7429 ierr = VecDestroy(&p);CHKERRQ(ierr); 7430 ierr = MatDestroy(&B);CHKERRQ(ierr); 7431 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7432 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7433 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7434 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7435 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7436 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7437 ierr = VecDestroy(&v);CHKERRQ(ierr); 7438 } 7439 } 7440 if (reuser) { 7441 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7442 } else { 7443 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7444 } 7445 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7446 PetscScalar *arraym,*arrayv; 7447 PetscInt nl; 7448 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7449 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7450 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7451 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7452 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7453 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7454 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7455 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7456 } else { 7457 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7458 } 7459 } else { 7460 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7461 } 7462 if (coarse_mat_is || coarse_mat) { 7463 PetscMPIInt size; 7464 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7465 if (!multilevel_allowed) { 7466 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7467 } else { 7468 Mat A; 7469 7470 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7471 if (coarse_mat_is) { 7472 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7473 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7474 coarse_mat = coarse_mat_is; 7475 } 7476 /* be sure we don't have MatSeqDENSE as local mat */ 7477 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7478 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7479 } 7480 } 7481 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7482 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7483 7484 /* create local to global scatters for coarse problem */ 7485 if (compute_vecs) { 7486 PetscInt lrows; 7487 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7488 if (coarse_mat) { 7489 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7490 } else { 7491 lrows = 0; 7492 } 7493 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7494 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7495 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7496 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7497 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7498 } 7499 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7500 7501 /* set defaults for coarse KSP and PC */ 7502 if (multilevel_allowed) { 7503 coarse_ksp_type = KSPRICHARDSON; 7504 coarse_pc_type = PCBDDC; 7505 } else { 7506 coarse_ksp_type = KSPPREONLY; 7507 coarse_pc_type = PCREDUNDANT; 7508 } 7509 7510 /* print some info if requested */ 7511 if (pcbddc->dbg_flag) { 7512 if (!multilevel_allowed) { 7513 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7514 if (multilevel_requested) { 7515 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7516 } else if (pcbddc->max_levels) { 7517 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7518 } 7519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7520 } 7521 } 7522 7523 /* communicate coarse discrete gradient */ 7524 coarseG = NULL; 7525 if (pcbddc->nedcG && multilevel_allowed) { 7526 MPI_Comm ccomm; 7527 if (coarse_mat) { 7528 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7529 } else { 7530 ccomm = MPI_COMM_NULL; 7531 } 7532 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7533 } 7534 7535 /* create the coarse KSP object only once with defaults */ 7536 if (coarse_mat) { 7537 PetscViewer dbg_viewer = NULL; 7538 if (pcbddc->dbg_flag) { 7539 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7540 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7541 } 7542 if (!pcbddc->coarse_ksp) { 7543 char prefix[256],str_level[16]; 7544 size_t len; 7545 7546 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7547 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7548 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7549 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7550 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7551 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7552 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7553 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7554 /* TODO is this logic correct? should check for coarse_mat type */ 7555 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7556 /* prefix */ 7557 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7558 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7559 if (!pcbddc->current_level) { 7560 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7561 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7562 } else { 7563 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7564 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7565 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7566 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7567 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7568 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7569 } 7570 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7571 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7572 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7573 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7574 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7575 /* allow user customization */ 7576 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7577 } 7578 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7579 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7580 if (nisdofs) { 7581 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7582 for (i=0;i<nisdofs;i++) { 7583 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7584 } 7585 } 7586 if (nisneu) { 7587 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7588 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7589 } 7590 if (nisvert) { 7591 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7592 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7593 } 7594 if (coarseG) { 7595 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7596 } 7597 7598 /* get some info after set from options */ 7599 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7600 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7601 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7602 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7603 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7604 isbddc = PETSC_FALSE; 7605 } 7606 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7607 if (isredundant) { 7608 KSP inner_ksp; 7609 PC inner_pc; 7610 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7611 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7612 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7613 } 7614 7615 /* parameters which miss an API */ 7616 if (isbddc) { 7617 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7618 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7619 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7620 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7621 if (pcbddc_coarse->benign_saddle_point) { 7622 Mat coarsedivudotp_is; 7623 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7624 IS row,col; 7625 const PetscInt *gidxs; 7626 PetscInt n,st,M,N; 7627 7628 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7629 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7630 st = st-n; 7631 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7632 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7633 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7634 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7635 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7636 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7637 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7638 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7639 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7640 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7641 ierr = ISDestroy(&row);CHKERRQ(ierr); 7642 ierr = ISDestroy(&col);CHKERRQ(ierr); 7643 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7644 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7645 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7646 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7647 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7648 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7649 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7650 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7651 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7652 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7653 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7654 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7655 } 7656 } 7657 7658 /* propagate symmetry info of coarse matrix */ 7659 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7660 if (pc->pmat->symmetric_set) { 7661 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7662 } 7663 if (pc->pmat->hermitian_set) { 7664 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7665 } 7666 if (pc->pmat->spd_set) { 7667 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7668 } 7669 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7670 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7671 } 7672 /* set operators */ 7673 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7674 if (pcbddc->dbg_flag) { 7675 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7676 } 7677 } 7678 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7679 ierr = PetscFree(isarray);CHKERRQ(ierr); 7680 #if 0 7681 { 7682 PetscViewer viewer; 7683 char filename[256]; 7684 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7685 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7686 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7687 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7688 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7689 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7690 } 7691 #endif 7692 7693 if (pcbddc->coarse_ksp) { 7694 Vec crhs,csol; 7695 7696 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7697 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7698 if (!csol) { 7699 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7700 } 7701 if (!crhs) { 7702 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7703 } 7704 } 7705 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7706 7707 /* compute null space for coarse solver if the benign trick has been requested */ 7708 if (pcbddc->benign_null) { 7709 7710 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7711 for (i=0;i<pcbddc->benign_n;i++) { 7712 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7713 } 7714 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7715 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7716 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7717 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7718 if (coarse_mat) { 7719 Vec nullv; 7720 PetscScalar *array,*array2; 7721 PetscInt nl; 7722 7723 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7724 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7725 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7726 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7727 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7728 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7729 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7730 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7731 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7732 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7733 } 7734 } 7735 7736 if (pcbddc->coarse_ksp) { 7737 PetscBool ispreonly; 7738 7739 if (CoarseNullSpace) { 7740 PetscBool isnull; 7741 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7742 if (isnull) { 7743 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7744 } 7745 /* TODO: add local nullspaces (if any) */ 7746 } 7747 /* setup coarse ksp */ 7748 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7749 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7750 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7751 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7752 KSP check_ksp; 7753 KSPType check_ksp_type; 7754 PC check_pc; 7755 Vec check_vec,coarse_vec; 7756 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7757 PetscInt its; 7758 PetscBool compute_eigs; 7759 PetscReal *eigs_r,*eigs_c; 7760 PetscInt neigs; 7761 const char *prefix; 7762 7763 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7764 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7765 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7766 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7767 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7768 /* prevent from setup unneeded object */ 7769 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7770 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7771 if (ispreonly) { 7772 check_ksp_type = KSPPREONLY; 7773 compute_eigs = PETSC_FALSE; 7774 } else { 7775 check_ksp_type = KSPGMRES; 7776 compute_eigs = PETSC_TRUE; 7777 } 7778 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7779 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7780 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7781 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7782 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7783 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7784 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7785 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7786 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7787 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7788 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7789 /* create random vec */ 7790 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7791 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7792 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7793 /* solve coarse problem */ 7794 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7795 /* set eigenvalue estimation if preonly has not been requested */ 7796 if (compute_eigs) { 7797 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7798 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7799 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7800 if (neigs) { 7801 lambda_max = eigs_r[neigs-1]; 7802 lambda_min = eigs_r[0]; 7803 if (pcbddc->use_coarse_estimates) { 7804 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7805 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7806 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7807 } 7808 } 7809 } 7810 } 7811 7812 /* check coarse problem residual error */ 7813 if (pcbddc->dbg_flag) { 7814 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7815 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7816 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7817 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7818 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7819 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7820 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7821 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7822 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7823 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7824 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7825 if (CoarseNullSpace) { 7826 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7827 } 7828 if (compute_eigs) { 7829 PetscReal lambda_max_s,lambda_min_s; 7830 KSPConvergedReason reason; 7831 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7832 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7833 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7834 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7835 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7836 for (i=0;i<neigs;i++) { 7837 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7838 } 7839 } 7840 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7841 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7842 } 7843 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7844 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7845 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7846 if (compute_eigs) { 7847 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7848 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7849 } 7850 } 7851 } 7852 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7853 /* print additional info */ 7854 if (pcbddc->dbg_flag) { 7855 /* waits until all processes reaches this point */ 7856 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7857 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7858 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7859 } 7860 7861 /* free memory */ 7862 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7863 PetscFunctionReturn(0); 7864 } 7865 7866 #undef __FUNCT__ 7867 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7868 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7869 { 7870 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7871 PC_IS* pcis = (PC_IS*)pc->data; 7872 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7873 IS subset,subset_mult,subset_n; 7874 PetscInt local_size,coarse_size=0; 7875 PetscInt *local_primal_indices=NULL; 7876 const PetscInt *t_local_primal_indices; 7877 PetscErrorCode ierr; 7878 7879 PetscFunctionBegin; 7880 /* Compute global number of coarse dofs */ 7881 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7882 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7883 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7884 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7885 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7886 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7887 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7888 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7889 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7890 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7891 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7892 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7893 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7894 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7895 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7896 7897 /* check numbering */ 7898 if (pcbddc->dbg_flag) { 7899 PetscScalar coarsesum,*array,*array2; 7900 PetscInt i; 7901 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7902 7903 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7904 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7905 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7906 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7907 /* counter */ 7908 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7909 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7910 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7911 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7912 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7913 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7914 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7915 for (i=0;i<pcbddc->local_primal_size;i++) { 7916 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7917 } 7918 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7919 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7920 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7921 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7922 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7923 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7924 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7925 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7926 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7927 for (i=0;i<pcis->n;i++) { 7928 if (array[i] != 0.0 && array[i] != array2[i]) { 7929 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7930 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7931 set_error = PETSC_TRUE; 7932 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7933 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7934 } 7935 } 7936 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7937 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7938 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7939 for (i=0;i<pcis->n;i++) { 7940 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7941 } 7942 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7943 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7944 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7945 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7946 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7947 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7948 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7949 PetscInt *gidxs; 7950 7951 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7952 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7953 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7954 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7956 for (i=0;i<pcbddc->local_primal_size;i++) { 7957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7958 } 7959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7960 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7961 } 7962 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7963 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7964 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7965 } 7966 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7967 /* get back data */ 7968 *coarse_size_n = coarse_size; 7969 *local_primal_indices_n = local_primal_indices; 7970 PetscFunctionReturn(0); 7971 } 7972 7973 #undef __FUNCT__ 7974 #define __FUNCT__ "PCBDDCGlobalToLocal" 7975 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7976 { 7977 IS localis_t; 7978 PetscInt i,lsize,*idxs,n; 7979 PetscScalar *vals; 7980 PetscErrorCode ierr; 7981 7982 PetscFunctionBegin; 7983 /* get indices in local ordering exploiting local to global map */ 7984 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7985 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7986 for (i=0;i<lsize;i++) vals[i] = 1.0; 7987 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7988 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7989 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7990 if (idxs) { /* multilevel guard */ 7991 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7992 } 7993 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7994 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7995 ierr = PetscFree(vals);CHKERRQ(ierr); 7996 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7997 /* now compute set in local ordering */ 7998 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7999 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8000 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8001 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8002 for (i=0,lsize=0;i<n;i++) { 8003 if (PetscRealPart(vals[i]) > 0.5) { 8004 lsize++; 8005 } 8006 } 8007 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8008 for (i=0,lsize=0;i<n;i++) { 8009 if (PetscRealPart(vals[i]) > 0.5) { 8010 idxs[lsize++] = i; 8011 } 8012 } 8013 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8014 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8015 *localis = localis_t; 8016 PetscFunctionReturn(0); 8017 } 8018 8019 #undef __FUNCT__ 8020 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 8021 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8022 { 8023 PC_IS *pcis=(PC_IS*)pc->data; 8024 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8025 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8026 Mat S_j; 8027 PetscInt *used_xadj,*used_adjncy; 8028 PetscBool free_used_adj; 8029 PetscErrorCode ierr; 8030 8031 PetscFunctionBegin; 8032 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8033 free_used_adj = PETSC_FALSE; 8034 if (pcbddc->sub_schurs_layers == -1) { 8035 used_xadj = NULL; 8036 used_adjncy = NULL; 8037 } else { 8038 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8039 used_xadj = pcbddc->mat_graph->xadj; 8040 used_adjncy = pcbddc->mat_graph->adjncy; 8041 } else if (pcbddc->computed_rowadj) { 8042 used_xadj = pcbddc->mat_graph->xadj; 8043 used_adjncy = pcbddc->mat_graph->adjncy; 8044 } else { 8045 PetscBool flg_row=PETSC_FALSE; 8046 const PetscInt *xadj,*adjncy; 8047 PetscInt nvtxs; 8048 8049 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8050 if (flg_row) { 8051 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8052 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8053 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8054 free_used_adj = PETSC_TRUE; 8055 } else { 8056 pcbddc->sub_schurs_layers = -1; 8057 used_xadj = NULL; 8058 used_adjncy = NULL; 8059 } 8060 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8061 } 8062 } 8063 8064 /* setup sub_schurs data */ 8065 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8066 if (!sub_schurs->schur_explicit) { 8067 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8068 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8069 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8070 } else { 8071 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8072 PetscBool isseqaij,need_change = PETSC_FALSE; 8073 PetscInt benign_n; 8074 Mat change = NULL; 8075 Vec scaling = NULL; 8076 IS change_primal = NULL; 8077 8078 if (!pcbddc->use_vertices && reuse_solvers) { 8079 PetscInt n_vertices; 8080 8081 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8082 reuse_solvers = (PetscBool)!n_vertices; 8083 } 8084 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8085 if (!isseqaij) { 8086 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8087 if (matis->A == pcbddc->local_mat) { 8088 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8089 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8090 } else { 8091 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8092 } 8093 } 8094 if (!pcbddc->benign_change_explicit) { 8095 benign_n = pcbddc->benign_n; 8096 } else { 8097 benign_n = 0; 8098 } 8099 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8100 We need a global reduction to avoid possible deadlocks. 8101 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8102 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8103 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8104 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8105 need_change = (PetscBool)(!need_change); 8106 } 8107 /* If the user defines additional constraints, we import them here. 8108 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8109 if (need_change) { 8110 PC_IS *pcisf; 8111 PC_BDDC *pcbddcf; 8112 PC pcf; 8113 8114 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8115 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8116 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8117 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8118 /* hacks */ 8119 pcisf = (PC_IS*)pcf->data; 8120 pcisf->is_B_local = pcis->is_B_local; 8121 pcisf->vec1_N = pcis->vec1_N; 8122 pcisf->BtoNmap = pcis->BtoNmap; 8123 pcisf->n = pcis->n; 8124 pcisf->n_B = pcis->n_B; 8125 pcbddcf = (PC_BDDC*)pcf->data; 8126 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8127 pcbddcf->mat_graph = pcbddc->mat_graph; 8128 pcbddcf->use_faces = PETSC_TRUE; 8129 pcbddcf->use_change_of_basis = PETSC_TRUE; 8130 pcbddcf->use_change_on_faces = PETSC_TRUE; 8131 pcbddcf->use_qr_single = PETSC_TRUE; 8132 pcbddcf->fake_change = PETSC_TRUE; 8133 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8134 /* store information on primal vertices and change of basis (in local numbering) */ 8135 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8136 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8137 change = pcbddcf->ConstraintMatrix; 8138 pcbddcf->ConstraintMatrix = NULL; 8139 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8140 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8141 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8142 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8143 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8144 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8145 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8146 pcf->ops->destroy = NULL; 8147 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8148 } 8149 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8150 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8151 ierr = MatDestroy(&change);CHKERRQ(ierr); 8152 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8153 } 8154 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8155 8156 /* free adjacency */ 8157 if (free_used_adj) { 8158 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8159 } 8160 PetscFunctionReturn(0); 8161 } 8162 8163 #undef __FUNCT__ 8164 #define __FUNCT__ "PCBDDCInitSubSchurs" 8165 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8166 { 8167 PC_IS *pcis=(PC_IS*)pc->data; 8168 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8169 PCBDDCGraph graph; 8170 PetscErrorCode ierr; 8171 8172 PetscFunctionBegin; 8173 /* attach interface graph for determining subsets */ 8174 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8175 IS verticesIS,verticescomm; 8176 PetscInt vsize,*idxs; 8177 8178 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8179 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8180 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8181 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8182 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8183 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8184 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8185 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8186 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8187 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8188 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8189 } else { 8190 graph = pcbddc->mat_graph; 8191 } 8192 /* print some info */ 8193 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8194 IS vertices; 8195 PetscInt nv,nedges,nfaces; 8196 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8197 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8198 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8199 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8200 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8201 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8202 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8203 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8204 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8205 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8206 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8207 } 8208 8209 /* sub_schurs init */ 8210 if (!pcbddc->sub_schurs) { 8211 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8212 } 8213 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8214 8215 /* free graph struct */ 8216 if (pcbddc->sub_schurs_rebuild) { 8217 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8218 } 8219 PetscFunctionReturn(0); 8220 } 8221 8222 #undef __FUNCT__ 8223 #define __FUNCT__ "PCBDDCCheckOperator" 8224 PetscErrorCode PCBDDCCheckOperator(PC pc) 8225 { 8226 PC_IS *pcis=(PC_IS*)pc->data; 8227 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8228 PetscErrorCode ierr; 8229 8230 PetscFunctionBegin; 8231 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8232 IS zerodiag = NULL; 8233 Mat S_j,B0_B=NULL; 8234 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8235 PetscScalar *p0_check,*array,*array2; 8236 PetscReal norm; 8237 PetscInt i; 8238 8239 /* B0 and B0_B */ 8240 if (zerodiag) { 8241 IS dummy; 8242 8243 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8244 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8245 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8246 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8247 } 8248 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8249 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8250 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8251 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8252 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8253 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8254 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8255 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8256 /* S_j */ 8257 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8258 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8259 8260 /* mimic vector in \widetilde{W}_\Gamma */ 8261 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8262 /* continuous in primal space */ 8263 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8264 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8265 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8266 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8267 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8268 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8269 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8270 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8271 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8272 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8273 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8274 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8275 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8276 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8277 8278 /* assemble rhs for coarse problem */ 8279 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8280 /* local with Schur */ 8281 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8282 if (zerodiag) { 8283 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8284 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8285 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8286 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8287 } 8288 /* sum on primal nodes the local contributions */ 8289 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8290 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8291 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8292 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8293 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8294 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8295 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8296 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8297 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8298 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8299 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8300 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8301 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8302 /* scale primal nodes (BDDC sums contibutions) */ 8303 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8304 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8305 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8306 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8307 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8308 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8309 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8310 /* global: \widetilde{B0}_B w_\Gamma */ 8311 if (zerodiag) { 8312 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8313 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8314 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8315 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8316 } 8317 /* BDDC */ 8318 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8319 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8320 8321 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8322 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8323 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8324 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8325 for (i=0;i<pcbddc->benign_n;i++) { 8326 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8327 } 8328 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8329 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8330 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8331 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8332 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8333 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8334 } 8335 PetscFunctionReturn(0); 8336 } 8337 8338 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8339 #undef __FUNCT__ 8340 #define __FUNCT__ "MatMPIAIJRestrict" 8341 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8342 { 8343 Mat At; 8344 IS rows; 8345 PetscInt rst,ren; 8346 PetscErrorCode ierr; 8347 PetscLayout rmap; 8348 8349 PetscFunctionBegin; 8350 rst = ren = 0; 8351 if (ccomm != MPI_COMM_NULL) { 8352 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8353 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8354 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8355 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8356 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8357 } 8358 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8359 ierr = MatGetSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8360 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8361 8362 if (ccomm != MPI_COMM_NULL) { 8363 Mat_MPIAIJ *a,*b; 8364 IS from,to; 8365 Vec gvec; 8366 PetscInt lsize; 8367 8368 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8369 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8370 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8371 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8372 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8373 a = (Mat_MPIAIJ*)At->data; 8374 b = (Mat_MPIAIJ*)(*B)->data; 8375 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8376 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8377 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8378 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8379 b->A = a->A; 8380 b->B = a->B; 8381 8382 b->donotstash = a->donotstash; 8383 b->roworiented = a->roworiented; 8384 b->rowindices = 0; 8385 b->rowvalues = 0; 8386 b->getrowactive = PETSC_FALSE; 8387 8388 (*B)->rmap = rmap; 8389 (*B)->factortype = A->factortype; 8390 (*B)->assembled = PETSC_TRUE; 8391 (*B)->insertmode = NOT_SET_VALUES; 8392 (*B)->preallocated = PETSC_TRUE; 8393 8394 if (a->colmap) { 8395 #if defined(PETSC_USE_CTABLE) 8396 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8397 #else 8398 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8399 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8400 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8401 #endif 8402 } else b->colmap = 0; 8403 if (a->garray) { 8404 PetscInt len; 8405 len = a->B->cmap->n; 8406 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8407 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8408 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8409 } else b->garray = 0; 8410 8411 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8412 b->lvec = a->lvec; 8413 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8414 8415 /* cannot use VecScatterCopy */ 8416 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8417 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8418 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8419 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8420 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8421 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8422 ierr = ISDestroy(&from);CHKERRQ(ierr); 8423 ierr = ISDestroy(&to);CHKERRQ(ierr); 8424 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8425 } 8426 ierr = MatDestroy(&At);CHKERRQ(ierr); 8427 PetscFunctionReturn(0); 8428 } 8429