1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* if range is true, it returns B s.t. span{B} = range(A) 10 if range is false, it returns B s.t. range(B) _|_ range(A) */ 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseOrthogonalRangeOrComplement" 13 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 14 { 15 #if !defined(PETSC_USE_COMPLEX) 16 PetscScalar *uwork,*data,*U, ds = 0.; 17 PetscReal *sing; 18 PetscBLASInt bM,bN,lwork,lierr,di = 1; 19 PetscInt ulw,i,nr,nc,n; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 #if defined(PETSC_MISSING_LAPACK_GESVD) 24 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 25 #else 26 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 49 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 50 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 51 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 52 ierr = PetscFPTrapPop();CHKERRQ(ierr); 53 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 54 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 55 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 56 if (!rwork) { 57 ierr = PetscFree(sing);CHKERRQ(ierr); 58 } 59 if (!work) { 60 ierr = PetscFree(uwork);CHKERRQ(ierr); 61 } 62 /* create B */ 63 if (!range) { 64 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 65 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 67 } else { 68 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 69 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 70 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 71 } 72 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscFree(U);CHKERRQ(ierr); 74 #endif 75 #else /* PETSC_USE_COMPLEX */ 76 PetscFunctionBegin; 77 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 78 #endif 79 PetscFunctionReturn(0); 80 } 81 82 /* TODO REMOVE */ 83 #if defined(PRINT_GDET) 84 static int inc = 0; 85 static int lev = 0; 86 #endif 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatGetSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "PCBDDCNedelecSupport" 157 PetscErrorCode PCBDDCNedelecSupport(PC pc) 158 { 159 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 160 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 161 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 162 Vec tvec; 163 PetscSF sfv; 164 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 165 MPI_Comm comm; 166 IS lned,primals,allprimals,nedfieldlocal; 167 IS *eedges,*extrows,*extcols,*alleedges; 168 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 169 PetscScalar *vals,*work; 170 PetscReal *rwork; 171 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 172 PetscInt ne,nv,Lv,order,n,field; 173 PetscInt n_neigh,*neigh,*n_shared,**shared; 174 PetscInt i,j,extmem,cum,maxsize,nee; 175 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 176 PetscInt *sfvleaves,*sfvroots; 177 PetscInt *corners,*cedges; 178 PetscInt *ecount,**eneighs,*vcount,**vneighs; 179 #if defined(PETSC_USE_DEBUG) 180 PetscInt *emarks; 181 #endif 182 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 183 PetscErrorCode ierr; 184 185 PetscFunctionBegin; 186 /* If the discrete gradient is defined for a subset of dofs and global is true, 187 it assumes G is given in global ordering for all the dofs. 188 Otherwise, the ordering is global for the Nedelec field */ 189 order = pcbddc->nedorder; 190 conforming = pcbddc->conforming; 191 field = pcbddc->nedfield; 192 global = pcbddc->nedglobal; 193 setprimal = PETSC_FALSE; 194 print = PETSC_FALSE; 195 singular = PETSC_FALSE; 196 197 /* Command line customization */ 198 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 199 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsInt ("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 202 /* print debug info TODO: to be removed */ 203 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 204 ierr = PetscOptionsEnd();CHKERRQ(ierr); 205 206 /* Return if there are no edges in the decomposition and the problem is not singular */ 207 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 208 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 209 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 210 if (!singular) { 211 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 212 lrc[0] = PETSC_FALSE; 213 for (i=0;i<n;i++) { 214 if (PetscRealPart(vals[i]) > 2.) { 215 lrc[0] = PETSC_TRUE; 216 break; 217 } 218 } 219 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 220 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 221 if (!lrc[1]) PetscFunctionReturn(0); 222 } 223 224 /* Get Nedelec field */ 225 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 226 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 227 if (pcbddc->n_ISForDofsLocal && field >= 0) { 228 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 229 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 230 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 231 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 232 ne = n; 233 nedfieldlocal = NULL; 234 global = PETSC_TRUE; 235 } else if (field == PETSC_DECIDE) { 236 PetscInt rst,ren,*idx; 237 238 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 239 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 241 for (i=rst;i<ren;i++) { 242 PetscInt nc; 243 244 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 246 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 247 } 248 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 249 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 250 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 251 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 252 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 253 } else { 254 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 255 } 256 257 /* Sanity checks */ 258 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 259 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 260 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 261 262 /* Just set primal dofs and return */ 263 if (setprimal) { 264 IS enedfieldlocal; 265 PetscInt *eidxs; 266 267 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 268 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 269 if (nedfieldlocal) { 270 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 271 for (i=0,cum=0;i<ne;i++) { 272 if (PetscRealPart(vals[idxs[i]]) > 2.) { 273 eidxs[cum++] = idxs[i]; 274 } 275 } 276 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 277 } else { 278 for (i=0,cum=0;i<ne;i++) { 279 if (PetscRealPart(vals[i]) > 2.) { 280 eidxs[cum++] = i; 281 } 282 } 283 } 284 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 285 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 286 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 287 ierr = PetscFree(eidxs);CHKERRQ(ierr); 288 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 289 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 /* Compute some l2g maps */ 294 if (nedfieldlocal) { 295 IS is; 296 297 /* need to map from the local Nedelec field to local numbering */ 298 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 299 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 300 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 301 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 302 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 303 if (global) { 304 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 305 el2g = al2g; 306 } else { 307 IS gis; 308 309 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 310 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 311 ierr = ISDestroy(&gis);CHKERRQ(ierr); 312 } 313 ierr = ISDestroy(&is);CHKERRQ(ierr); 314 } else { 315 /* restore default */ 316 pcbddc->nedfield = -1; 317 /* one ref for the destruction of al2g, one for el2g */ 318 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 319 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 320 el2g = al2g; 321 fl2g = NULL; 322 } 323 324 /* Start communication to drop connections for interior edges (for cc analysis only) */ 325 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 326 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 327 if (nedfieldlocal) { 328 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 330 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 331 } else { 332 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 333 } 334 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 335 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 336 337 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 338 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 339 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 340 if (global) { 341 PetscInt rst; 342 343 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 344 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 345 if (matis->sf_rootdata[i] < 2) { 346 matis->sf_rootdata[cum++] = i + rst; 347 } 348 } 349 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 350 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 351 } else { 352 PetscInt *tbz; 353 354 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 355 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 356 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 357 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 for (i=0,cum=0;i<ne;i++) 359 if (matis->sf_leafdata[idxs[i]] == 1) 360 tbz[cum++] = i; 361 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 362 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 363 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 364 ierr = PetscFree(tbz);CHKERRQ(ierr); 365 } 366 } else { /* we need the entire G to infer the nullspace */ 367 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 368 G = pcbddc->discretegradient; 369 } 370 371 /* Extract subdomain relevant rows of G */ 372 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 373 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 374 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 375 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 376 ierr = ISDestroy(&lned);CHKERRQ(ierr); 377 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 378 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 379 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 380 381 /* SF for nodal dofs communications */ 382 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 383 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 384 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 386 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 388 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 389 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 390 i = singular ? 2 : 1; 391 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 392 393 /* Destroy temporary G created in MATIS format and modified G */ 394 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 395 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 396 ierr = MatDestroy(&G);CHKERRQ(ierr); 397 398 if (print) { 399 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 400 ierr = MatView(lG,NULL);CHKERRQ(ierr); 401 } 402 403 /* Save lG for values insertion in change of basis */ 404 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 405 406 /* Analyze the edge-nodes connections (duplicate lG) */ 407 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 408 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 411 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 412 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 413 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 414 /* need to import the boundary specification to ensure the 415 proper detection of coarse edges' endpoints */ 416 if (pcbddc->DirichletBoundariesLocal) { 417 IS is; 418 419 if (fl2g) { 420 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 421 } else { 422 is = pcbddc->DirichletBoundariesLocal; 423 } 424 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 425 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 426 for (i=0;i<cum;i++) { 427 if (idxs[i] >= 0) { 428 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 429 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 430 } 431 } 432 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 433 if (fl2g) { 434 ierr = ISDestroy(&is);CHKERRQ(ierr); 435 } 436 } 437 if (pcbddc->NeumannBoundariesLocal) { 438 IS is; 439 440 if (fl2g) { 441 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 442 } else { 443 is = pcbddc->NeumannBoundariesLocal; 444 } 445 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 446 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 447 for (i=0;i<cum;i++) { 448 if (idxs[i] >= 0) { 449 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 450 } 451 } 452 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 453 if (fl2g) { 454 ierr = ISDestroy(&is);CHKERRQ(ierr); 455 } 456 } 457 458 /* Count neighs per dof */ 459 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 460 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 461 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 462 for (i=1,cum=0;i<n_neigh;i++) { 463 cum += n_shared[i]; 464 for (j=0;j<n_shared[i];j++) { 465 ecount[shared[i][j]]++; 466 } 467 } 468 if (ne) { 469 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 470 } 471 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 472 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 473 for (i=1;i<n_neigh;i++) { 474 for (j=0;j<n_shared[i];j++) { 475 PetscInt k = shared[i][j]; 476 eneighs[k][ecount[k]] = neigh[i]; 477 ecount[k]++; 478 } 479 } 480 for (i=0;i<ne;i++) { 481 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 482 } 483 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 484 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 485 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 486 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 487 for (i=1,cum=0;i<n_neigh;i++) { 488 cum += n_shared[i]; 489 for (j=0;j<n_shared[i];j++) { 490 vcount[shared[i][j]]++; 491 } 492 } 493 if (nv) { 494 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 495 } 496 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 497 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 498 for (i=1;i<n_neigh;i++) { 499 for (j=0;j<n_shared[i];j++) { 500 PetscInt k = shared[i][j]; 501 vneighs[k][vcount[k]] = neigh[i]; 502 vcount[k]++; 503 } 504 } 505 for (i=0;i<nv;i++) { 506 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 507 } 508 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 509 510 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 511 for proper detection of coarse edges' endpoints */ 512 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 513 for (i=0;i<ne;i++) { 514 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 515 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 516 } 517 } 518 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 519 if (!conforming) { 520 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 521 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 522 } 523 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 524 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 525 cum = 0; 526 for (i=0;i<ne;i++) { 527 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 528 if (!PetscBTLookup(btee,i)) { 529 marks[cum++] = i; 530 continue; 531 } 532 /* set badly connected edge dofs as primal */ 533 if (!conforming) { 534 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 535 marks[cum++] = i; 536 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 537 for (j=ii[i];j<ii[i+1];j++) { 538 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 539 } 540 } else { 541 /* every edge dofs should be connected trough a certain number of nodal dofs 542 to other edge dofs belonging to coarse edges 543 - at most 2 endpoints 544 - order-1 interior nodal dofs 545 - no undefined nodal dofs (nconn < order) 546 */ 547 PetscInt ends = 0,ints = 0, undef = 0; 548 for (j=ii[i];j<ii[i+1];j++) { 549 PetscInt v = jj[j],k; 550 PetscInt nconn = iit[v+1]-iit[v]; 551 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 552 if (nconn > order) ends++; 553 else if (nconn == order) ints++; 554 else undef++; 555 } 556 if (undef || ends > 2 || ints != order -1) { 557 marks[cum++] = i; 558 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 559 for (j=ii[i];j<ii[i+1];j++) { 560 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 561 } 562 } 563 } 564 } 565 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 566 if (!order && ii[i+1] != ii[i]) { 567 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 568 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 569 } 570 } 571 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 572 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 573 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 574 if (!conforming) { 575 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 576 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 577 } 578 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 579 580 /* identify splitpoints and corner candidates */ 581 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 582 if (print) { 583 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 584 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 585 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 586 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 587 } 588 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 589 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 590 for (i=0;i<nv;i++) { 591 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 592 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 593 if (!order) { /* variable order */ 594 PetscReal vorder = 0.; 595 596 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 597 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 598 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 599 ord = 1; 600 } 601 #if defined(PETSC_USE_DEBUG) 602 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 603 #endif 604 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 605 if (PetscBTLookup(btbd,jj[j])) { 606 bdir = PETSC_TRUE; 607 break; 608 } 609 if (vc != ecount[jj[j]]) { 610 sneighs = PETSC_FALSE; 611 } else { 612 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 613 for (k=0;k<vc;k++) { 614 if (vn[k] != en[k]) { 615 sneighs = PETSC_FALSE; 616 break; 617 } 618 } 619 } 620 } 621 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 622 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 623 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 624 } else if (test == ord) { 625 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 626 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 627 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 628 } else { 629 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 630 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 631 } 632 } 633 } 634 ierr = PetscFree(ecount);CHKERRQ(ierr); 635 ierr = PetscFree(vcount);CHKERRQ(ierr); 636 if (ne) { 637 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 638 } 639 if (nv) { 640 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 641 } 642 ierr = PetscFree(eneighs);CHKERRQ(ierr); 643 ierr = PetscFree(vneighs);CHKERRQ(ierr); 644 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 645 646 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 647 if (order != 1) { 648 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 649 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 650 for (i=0;i<nv;i++) { 651 if (PetscBTLookup(btvcand,i)) { 652 PetscBool found = PETSC_FALSE; 653 for (j=ii[i];j<ii[i+1] && !found;j++) { 654 PetscInt k,e = jj[j]; 655 if (PetscBTLookup(bte,e)) continue; 656 for (k=iit[e];k<iit[e+1];k++) { 657 PetscInt v = jjt[k]; 658 if (v != i && PetscBTLookup(btvcand,v)) { 659 found = PETSC_TRUE; 660 break; 661 } 662 } 663 } 664 if (!found) { 665 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 666 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 667 } else { 668 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 669 } 670 } 671 } 672 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 673 } 674 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 675 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 676 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 677 678 /* Get the local G^T explicitly */ 679 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 680 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 681 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 682 683 /* Mark interior nodal dofs */ 684 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 685 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 686 for (i=1;i<n_neigh;i++) { 687 for (j=0;j<n_shared[i];j++) { 688 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 689 } 690 } 691 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 692 693 /* communicate corners and splitpoints */ 694 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 695 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 696 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 697 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 698 699 if (print) { 700 IS tbz; 701 702 cum = 0; 703 for (i=0;i<nv;i++) 704 if (sfvleaves[i]) 705 vmarks[cum++] = i; 706 707 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 708 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 709 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 710 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 711 } 712 713 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 714 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 715 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 716 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 717 718 /* Zero rows of lGt corresponding to identified corners 719 and interior nodal dofs */ 720 cum = 0; 721 for (i=0;i<nv;i++) { 722 if (sfvleaves[i]) { 723 vmarks[cum++] = i; 724 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 725 } 726 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 727 } 728 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 729 if (print) { 730 IS tbz; 731 732 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 733 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 734 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 735 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 736 } 737 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 738 ierr = PetscFree(vmarks);CHKERRQ(ierr); 739 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 740 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 741 742 /* Recompute G */ 743 ierr = MatDestroy(&lG);CHKERRQ(ierr); 744 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 745 if (print) { 746 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 747 ierr = MatView(lG,NULL);CHKERRQ(ierr); 748 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 749 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 750 } 751 752 /* Get primal dofs (if any) */ 753 cum = 0; 754 for (i=0;i<ne;i++) { 755 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 756 } 757 if (fl2g) { 758 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 759 } 760 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 761 if (print) { 762 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 763 ierr = ISView(primals,NULL);CHKERRQ(ierr); 764 } 765 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 766 /* TODO: what if the user passed in some of them ? */ 767 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 768 ierr = ISDestroy(&primals);CHKERRQ(ierr); 769 770 /* Compute edge connectivity */ 771 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 772 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 773 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 774 if (fl2g) { 775 PetscBT btf; 776 PetscInt *iia,*jja,*iiu,*jju; 777 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 778 779 /* create CSR for all local dofs */ 780 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 781 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 782 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 783 iiu = pcbddc->mat_graph->xadj; 784 jju = pcbddc->mat_graph->adjncy; 785 } else if (pcbddc->use_local_adj) { 786 rest = PETSC_TRUE; 787 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 788 } else { 789 free = PETSC_TRUE; 790 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 791 iiu[0] = 0; 792 for (i=0;i<n;i++) { 793 iiu[i+1] = i+1; 794 jju[i] = -1; 795 } 796 } 797 798 /* import sizes of CSR */ 799 iia[0] = 0; 800 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 801 802 /* overwrite entries corresponding to the Nedelec field */ 803 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 804 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 805 for (i=0;i<ne;i++) { 806 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 807 iia[idxs[i]+1] = ii[i+1]-ii[i]; 808 } 809 810 /* iia in CSR */ 811 for (i=0;i<n;i++) iia[i+1] += iia[i]; 812 813 /* jja in CSR */ 814 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 815 for (i=0;i<n;i++) 816 if (!PetscBTLookup(btf,i)) 817 for (j=0;j<iiu[i+1]-iiu[i];j++) 818 jja[iia[i]+j] = jju[iiu[i]+j]; 819 820 /* map edge dofs connectivity */ 821 if (jj) { 822 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 823 for (i=0;i<ne;i++) { 824 PetscInt e = idxs[i]; 825 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 826 } 827 } 828 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 829 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 830 if (rest) { 831 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 832 } 833 if (free) { 834 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 835 } 836 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 837 } else { 838 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 839 } 840 841 /* Analyze interface for edge dofs */ 842 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 843 pcbddc->mat_graph->twodim = PETSC_FALSE; 844 845 /* Get coarse edges in the edge space */ 846 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 847 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 848 849 if (fl2g) { 850 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 851 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 852 for (i=0;i<nee;i++) { 853 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 854 } 855 } else { 856 eedges = alleedges; 857 primals = allprimals; 858 } 859 860 /* Mark fine edge dofs with their coarse edge id */ 861 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 862 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 863 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 864 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 865 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 866 if (print) { 867 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 868 ierr = ISView(primals,NULL);CHKERRQ(ierr); 869 } 870 871 maxsize = 0; 872 for (i=0;i<nee;i++) { 873 PetscInt size,mark = i+1; 874 875 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 876 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 for (j=0;j<size;j++) marks[idxs[j]] = mark; 878 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 879 maxsize = PetscMax(maxsize,size); 880 } 881 882 /* Find coarse edge endpoints */ 883 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 884 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 885 for (i=0;i<nee;i++) { 886 PetscInt mark = i+1,size; 887 888 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 889 if (!size && nedfieldlocal) continue; 890 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 891 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 892 if (print) { 893 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 894 ISView(eedges[i],NULL); 895 } 896 for (j=0;j<size;j++) { 897 PetscInt k, ee = idxs[j]; 898 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 899 for (k=ii[ee];k<ii[ee+1];k++) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 901 if (PetscBTLookup(btv,jj[k])) { 902 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 903 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 904 PetscInt k2; 905 PetscBool corner = PETSC_FALSE; 906 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 907 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 908 /* it's a corner if either is connected with an edge dof belonging to a different cc or 909 if the edge dof lie on the natural part of the boundary */ 910 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 911 corner = PETSC_TRUE; 912 break; 913 } 914 } 915 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 916 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 917 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 918 } else { 919 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 920 } 921 } 922 } 923 } 924 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 925 } 926 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 927 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 928 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 929 930 /* Reset marked primal dofs */ 931 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 932 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 933 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 934 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 935 936 /* Now use the initial lG */ 937 ierr = MatDestroy(&lG);CHKERRQ(ierr); 938 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 939 lG = lGinit; 940 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 941 942 /* Compute extended cols indices */ 943 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 944 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 945 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 946 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 947 i *= maxsize; 948 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 949 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 950 eerr = PETSC_FALSE; 951 for (i=0;i<nee;i++) { 952 PetscInt size,found = 0; 953 954 cum = 0; 955 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 956 if (!size && nedfieldlocal) continue; 957 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 958 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 959 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 960 for (j=0;j<size;j++) { 961 PetscInt k,ee = idxs[j]; 962 for (k=ii[ee];k<ii[ee+1];k++) { 963 PetscInt vv = jj[k]; 964 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 965 else if (!PetscBTLookupSet(btvc,vv)) found++; 966 } 967 } 968 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 969 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 970 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 971 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 972 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 973 /* it may happen that endpoints are not defined at this point 974 if it is the case, mark this edge for a second pass */ 975 if (cum != size -1 || found != 2) { 976 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 977 if (print) { 978 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 979 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 980 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 981 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 982 } 983 eerr = PETSC_TRUE; 984 } 985 } 986 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 987 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 988 if (done) { 989 PetscInt *newprimals; 990 991 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 992 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 993 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 995 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 996 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 997 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 998 for (i=0;i<nee;i++) { 999 PetscBool has_candidates = PETSC_FALSE; 1000 if (PetscBTLookup(bter,i)) { 1001 PetscInt size,mark = i+1; 1002 1003 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1004 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1005 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1006 for (j=0;j<size;j++) { 1007 PetscInt k,ee = idxs[j]; 1008 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1009 for (k=ii[ee];k<ii[ee+1];k++) { 1010 /* set all candidates located on the edge as corners */ 1011 if (PetscBTLookup(btvcand,jj[k])) { 1012 PetscInt k2,vv = jj[k]; 1013 has_candidates = PETSC_TRUE; 1014 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1015 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1016 /* set all edge dofs connected to candidate as primals */ 1017 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1018 if (marks[jjt[k2]] == mark) { 1019 PetscInt k3,ee2 = jjt[k2]; 1020 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1021 newprimals[cum++] = ee2; 1022 /* finally set the new corners */ 1023 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1024 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1025 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1026 } 1027 } 1028 } 1029 } else { 1030 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1031 } 1032 } 1033 } 1034 if (!has_candidates) { /* circular edge */ 1035 PetscInt k, ee = idxs[0],*tmarks; 1036 1037 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1038 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1039 for (k=ii[ee];k<ii[ee+1];k++) { 1040 PetscInt k2; 1041 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1042 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1043 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1044 } 1045 for (j=0;j<size;j++) { 1046 if (tmarks[idxs[j]] > 1) { 1047 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1048 newprimals[cum++] = idxs[j]; 1049 } 1050 } 1051 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1052 } 1053 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1054 } 1055 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1056 } 1057 ierr = PetscFree(extcols);CHKERRQ(ierr); 1058 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1059 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1060 if (fl2g) { 1061 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1062 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1063 for (i=0;i<nee;i++) { 1064 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1065 } 1066 ierr = PetscFree(eedges);CHKERRQ(ierr); 1067 } 1068 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1069 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1070 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1071 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1072 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1073 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1074 pcbddc->mat_graph->twodim = PETSC_FALSE; 1075 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1076 if (fl2g) { 1077 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1078 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1079 for (i=0;i<nee;i++) { 1080 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1081 } 1082 } else { 1083 eedges = alleedges; 1084 primals = allprimals; 1085 } 1086 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1087 1088 /* Mark again */ 1089 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1090 for (i=0;i<nee;i++) { 1091 PetscInt size,mark = i+1; 1092 1093 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1094 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1096 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1097 } 1098 if (print) { 1099 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1100 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1101 } 1102 1103 /* Recompute extended cols */ 1104 eerr = PETSC_FALSE; 1105 for (i=0;i<nee;i++) { 1106 PetscInt size; 1107 1108 cum = 0; 1109 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1110 if (!size && nedfieldlocal) continue; 1111 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1112 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1113 for (j=0;j<size;j++) { 1114 PetscInt k,ee = idxs[j]; 1115 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1116 } 1117 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1118 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1119 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1120 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1121 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1122 if (cum != size -1) { 1123 if (print) { 1124 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1127 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1128 } 1129 eerr = PETSC_TRUE; 1130 } 1131 } 1132 } 1133 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1134 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1135 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1136 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1137 /* an error should not occur at this point */ 1138 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1139 1140 /* Check the number of endpoints */ 1141 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1142 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1143 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1144 for (i=0;i<nee;i++) { 1145 PetscInt size, found = 0, gc[2]; 1146 1147 /* init with defaults */ 1148 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1149 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1150 if (!size && nedfieldlocal) continue; 1151 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1152 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1153 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1154 for (j=0;j<size;j++) { 1155 PetscInt k,ee = idxs[j]; 1156 for (k=ii[ee];k<ii[ee+1];k++) { 1157 PetscInt vv = jj[k]; 1158 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1159 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1160 corners[i*2+found++] = vv; 1161 } 1162 } 1163 } 1164 if (found != 2) { 1165 PetscInt e; 1166 if (fl2g) { 1167 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1168 } else { 1169 e = idxs[0]; 1170 } 1171 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1172 } 1173 1174 /* get primal dof index on this coarse edge */ 1175 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1176 if (gc[0] > gc[1]) { 1177 PetscInt swap = corners[2*i]; 1178 corners[2*i] = corners[2*i+1]; 1179 corners[2*i+1] = swap; 1180 } 1181 cedges[i] = idxs[size-1]; 1182 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1183 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1184 } 1185 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1186 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1187 1188 #if defined(PETSC_USE_DEBUG) 1189 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1190 not interfere with neighbouring coarse edges */ 1191 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1192 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1193 for (i=0;i<nv;i++) { 1194 PetscInt emax = 0,eemax = 0; 1195 1196 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1197 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1198 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1199 for (j=1;j<nee+1;j++) { 1200 if (emax < emarks[j]) { 1201 emax = emarks[j]; 1202 eemax = j; 1203 } 1204 } 1205 /* not relevant for edges */ 1206 if (!eemax) continue; 1207 1208 for (j=ii[i];j<ii[i+1];j++) { 1209 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1210 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1211 } 1212 } 1213 } 1214 ierr = PetscFree(emarks);CHKERRQ(ierr); 1215 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1216 #endif 1217 1218 /* Compute extended rows indices for edge blocks of the change of basis */ 1219 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1220 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1221 extmem *= maxsize; 1222 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1223 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1224 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1225 for (i=0;i<nv;i++) { 1226 PetscInt mark = 0,size,start; 1227 1228 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1229 for (j=ii[i];j<ii[i+1];j++) 1230 if (marks[jj[j]] && !mark) 1231 mark = marks[jj[j]]; 1232 1233 /* not relevant */ 1234 if (!mark) continue; 1235 1236 /* import extended row */ 1237 mark--; 1238 start = mark*extmem+extrowcum[mark]; 1239 size = ii[i+1]-ii[i]; 1240 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1241 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1242 extrowcum[mark] += size; 1243 } 1244 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1245 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1246 ierr = PetscFree(marks);CHKERRQ(ierr); 1247 1248 /* Compress extrows */ 1249 cum = 0; 1250 for (i=0;i<nee;i++) { 1251 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1252 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1253 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1254 cum = PetscMax(cum,size); 1255 } 1256 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1257 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1258 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1259 1260 /* Workspace for lapack inner calls and VecSetValues */ 1261 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1262 1263 /* Create change of basis matrix (preallocation can be improved) */ 1264 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1265 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1266 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1267 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1268 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1269 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1270 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1271 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1272 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1273 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1274 1275 /* Defaults to identity */ 1276 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1277 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1278 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1279 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1280 1281 /* Create discrete gradient for the coarser level if needed */ 1282 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1283 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1284 if (pcbddc->current_level < pcbddc->max_levels) { 1285 ISLocalToGlobalMapping cel2g,cvl2g; 1286 IS wis,gwis; 1287 PetscInt cnv,cne; 1288 1289 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1290 if (fl2g) { 1291 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1292 } else { 1293 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1294 pcbddc->nedclocal = wis; 1295 } 1296 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1297 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1298 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1299 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1300 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1301 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1302 1303 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1304 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1305 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1306 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1307 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1308 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1309 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1310 1311 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1312 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1313 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1314 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1315 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1316 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1317 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1318 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1319 } 1320 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1321 1322 #if defined(PRINT_GDET) 1323 inc = 0; 1324 lev = pcbddc->current_level; 1325 #endif 1326 1327 /* Insert values in the change of basis matrix */ 1328 for (i=0;i<nee;i++) { 1329 Mat Gins = NULL, GKins = NULL; 1330 IS cornersis = NULL; 1331 PetscScalar cvals[2]; 1332 1333 if (pcbddc->nedcG) { 1334 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1335 } 1336 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1337 if (Gins && GKins) { 1338 PetscScalar *data; 1339 const PetscInt *rows,*cols; 1340 PetscInt nrh,nch,nrc,ncc; 1341 1342 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1343 /* H1 */ 1344 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1345 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1346 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1348 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1349 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1350 /* complement */ 1351 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1352 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1353 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1354 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1355 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1356 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1357 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1358 1359 /* coarse discrete gradient */ 1360 if (pcbddc->nedcG) { 1361 PetscInt cols[2]; 1362 1363 cols[0] = 2*i; 1364 cols[1] = 2*i+1; 1365 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1366 } 1367 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1368 } 1369 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1370 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1371 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1372 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1373 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1374 } 1375 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1376 1377 /* Start assembling */ 1378 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 if (pcbddc->nedcG) { 1380 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1381 } 1382 1383 /* Free */ 1384 if (fl2g) { 1385 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1386 for (i=0;i<nee;i++) { 1387 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1388 } 1389 ierr = PetscFree(eedges);CHKERRQ(ierr); 1390 } 1391 1392 /* hack mat_graph with primal dofs on the coarse edges */ 1393 { 1394 PCBDDCGraph graph = pcbddc->mat_graph; 1395 PetscInt *oqueue = graph->queue; 1396 PetscInt *ocptr = graph->cptr; 1397 PetscInt ncc,*idxs; 1398 1399 /* find first primal edge */ 1400 if (pcbddc->nedclocal) { 1401 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1402 } else { 1403 if (fl2g) { 1404 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1405 } 1406 idxs = cedges; 1407 } 1408 cum = 0; 1409 while (cum < nee && cedges[cum] < 0) cum++; 1410 1411 /* adapt connected components */ 1412 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1413 graph->cptr[0] = 0; 1414 for (i=0,ncc=0;i<graph->ncc;i++) { 1415 PetscInt lc = ocptr[i+1]-ocptr[i]; 1416 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1417 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1418 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1419 ncc++; 1420 lc--; 1421 cum++; 1422 while (cum < nee && cedges[cum] < 0) cum++; 1423 } 1424 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1425 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1426 ncc++; 1427 } 1428 graph->ncc = ncc; 1429 if (pcbddc->nedclocal) { 1430 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1431 } 1432 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1433 } 1434 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1435 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1436 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1437 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1438 1439 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1440 ierr = PetscFree(extrow);CHKERRQ(ierr); 1441 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1442 ierr = PetscFree(corners);CHKERRQ(ierr); 1443 ierr = PetscFree(cedges);CHKERRQ(ierr); 1444 ierr = PetscFree(extrows);CHKERRQ(ierr); 1445 ierr = PetscFree(extcols);CHKERRQ(ierr); 1446 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1447 1448 /* Complete assembling */ 1449 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 if (pcbddc->nedcG) { 1451 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1452 #if 0 1453 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1454 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1455 #endif 1456 } 1457 1458 /* set change of basis */ 1459 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1460 ierr = MatDestroy(&T);CHKERRQ(ierr); 1461 1462 PetscFunctionReturn(0); 1463 } 1464 1465 /* the near-null space of BDDC carries information on quadrature weights, 1466 and these can be collinear -> so cheat with MatNullSpaceCreate 1467 and create a suitable set of basis vectors first */ 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1470 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1471 { 1472 PetscErrorCode ierr; 1473 PetscInt i; 1474 1475 PetscFunctionBegin; 1476 for (i=0;i<nvecs;i++) { 1477 PetscInt first,last; 1478 1479 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1480 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1481 if (i>=first && i < last) { 1482 PetscScalar *data; 1483 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1484 if (!has_const) { 1485 data[i-first] = 1.; 1486 } else { 1487 data[2*i-first] = 1./PetscSqrtReal(2.); 1488 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1489 } 1490 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1491 } 1492 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1493 } 1494 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1495 for (i=0;i<nvecs;i++) { /* reset vectors */ 1496 PetscInt first,last; 1497 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1498 if (i>=first && i < last) { 1499 PetscScalar *data; 1500 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1501 if (!has_const) { 1502 data[i-first] = 0.; 1503 } else { 1504 data[2*i-first] = 0.; 1505 data[2*i-first+1] = 0.; 1506 } 1507 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1508 } 1509 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1516 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1517 { 1518 Mat loc_divudotp; 1519 Vec p,v,vins,quad_vec,*quad_vecs; 1520 ISLocalToGlobalMapping map; 1521 IS *faces,*edges; 1522 PetscScalar *vals; 1523 const PetscScalar *array; 1524 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1525 PetscMPIInt rank; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1530 if (graph->twodim) { 1531 lmaxneighs = 2; 1532 } else { 1533 lmaxneighs = 1; 1534 for (i=0;i<ne;i++) { 1535 const PetscInt *idxs; 1536 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1537 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1538 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1539 } 1540 lmaxneighs++; /* graph count does not include self */ 1541 } 1542 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1543 maxsize = 0; 1544 for (i=0;i<ne;i++) { 1545 PetscInt nn; 1546 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1547 maxsize = PetscMax(maxsize,nn); 1548 } 1549 for (i=0;i<nf;i++) { 1550 PetscInt nn; 1551 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1552 maxsize = PetscMax(maxsize,nn); 1553 } 1554 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1555 /* create vectors to hold quadrature weights */ 1556 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1557 if (!transpose) { 1558 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1559 } else { 1560 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1561 } 1562 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1563 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1564 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1565 for (i=0;i<maxneighs;i++) { 1566 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1567 } 1568 1569 /* compute local quad vec */ 1570 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1571 if (!transpose) { 1572 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1573 } else { 1574 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1575 } 1576 ierr = VecSet(p,1.);CHKERRQ(ierr); 1577 if (!transpose) { 1578 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1579 } else { 1580 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1581 } 1582 if (vl2l) { 1583 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1584 } else { 1585 vins = v; 1586 } 1587 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1588 ierr = VecDestroy(&p);CHKERRQ(ierr); 1589 1590 /* insert in global quadrature vecs */ 1591 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1592 for (i=0;i<nf;i++) { 1593 const PetscInt *idxs; 1594 PetscInt idx,nn,j; 1595 1596 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1597 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1598 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1599 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1600 idx = -(idx+1); 1601 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1602 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1603 } 1604 for (i=0;i<ne;i++) { 1605 const PetscInt *idxs; 1606 PetscInt idx,nn,j; 1607 1608 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1609 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1610 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1611 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1612 idx = -(idx+1); 1613 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1614 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1615 } 1616 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1617 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1618 if (vl2l) { 1619 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1620 } 1621 ierr = VecDestroy(&v);CHKERRQ(ierr); 1622 ierr = PetscFree(vals);CHKERRQ(ierr); 1623 1624 /* assemble near null space */ 1625 for (i=0;i<maxneighs;i++) { 1626 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1627 } 1628 for (i=0;i<maxneighs;i++) { 1629 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1630 } 1631 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1638 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1639 { 1640 PetscErrorCode ierr; 1641 Vec local,global; 1642 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1643 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1644 1645 PetscFunctionBegin; 1646 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1647 /* need to convert from global to local topology information and remove references to information in global ordering */ 1648 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1649 if (pcbddc->user_provided_isfordofs) { 1650 if (pcbddc->n_ISForDofs) { 1651 PetscInt i; 1652 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1653 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1654 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1655 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1656 } 1657 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1658 pcbddc->n_ISForDofs = 0; 1659 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1660 } 1661 } else { 1662 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1663 PetscInt i, n = matis->A->rmap->n; 1664 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1665 if (i > 1) { 1666 pcbddc->n_ISForDofsLocal = i; 1667 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1668 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1669 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1670 } 1671 } 1672 } else { 1673 PetscInt i; 1674 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1675 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1676 } 1677 } 1678 } 1679 1680 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1681 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1682 } else if (pcbddc->DirichletBoundariesLocal) { 1683 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1684 } 1685 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1686 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1687 } else if (pcbddc->NeumannBoundariesLocal) { 1688 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1689 } 1690 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1692 } 1693 ierr = VecDestroy(&global);CHKERRQ(ierr); 1694 ierr = VecDestroy(&local);CHKERRQ(ierr); 1695 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "PCBDDCConsistencyCheckIS" 1701 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1702 { 1703 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1704 PetscErrorCode ierr; 1705 IS nis; 1706 const PetscInt *idxs; 1707 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1708 PetscBool *ld; 1709 1710 PetscFunctionBegin; 1711 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1712 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1713 if (mop == MPI_LAND) { 1714 /* init rootdata with true */ 1715 ld = (PetscBool*) matis->sf_rootdata; 1716 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1717 } else { 1718 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1719 } 1720 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1721 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1722 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1723 ld = (PetscBool*) matis->sf_leafdata; 1724 for (i=0;i<nd;i++) 1725 if (-1 < idxs[i] && idxs[i] < n) 1726 ld[idxs[i]] = PETSC_TRUE; 1727 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1728 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1729 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1730 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1731 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1732 if (mop == MPI_LAND) { 1733 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1734 } else { 1735 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1736 } 1737 for (i=0,nnd=0;i<n;i++) 1738 if (ld[i]) 1739 nidxs[nnd++] = i; 1740 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1741 ierr = ISDestroy(is);CHKERRQ(ierr); 1742 *is = nis; 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1748 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1749 { 1750 PC_IS *pcis = (PC_IS*)(pc->data); 1751 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1752 PetscErrorCode ierr; 1753 1754 PetscFunctionBegin; 1755 if (!pcbddc->benign_have_null) { 1756 PetscFunctionReturn(0); 1757 } 1758 if (pcbddc->ChangeOfBasisMatrix) { 1759 Vec swap; 1760 1761 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1762 swap = pcbddc->work_change; 1763 pcbddc->work_change = r; 1764 r = swap; 1765 } 1766 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1767 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1768 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1769 ierr = VecSet(z,0.);CHKERRQ(ierr); 1770 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1771 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1772 if (pcbddc->ChangeOfBasisMatrix) { 1773 pcbddc->work_change = r; 1774 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1775 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1776 } 1777 PetscFunctionReturn(0); 1778 } 1779 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1782 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1783 { 1784 PCBDDCBenignMatMult_ctx ctx; 1785 PetscErrorCode ierr; 1786 PetscBool apply_right,apply_left,reset_x; 1787 1788 PetscFunctionBegin; 1789 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1790 if (transpose) { 1791 apply_right = ctx->apply_left; 1792 apply_left = ctx->apply_right; 1793 } else { 1794 apply_right = ctx->apply_right; 1795 apply_left = ctx->apply_left; 1796 } 1797 reset_x = PETSC_FALSE; 1798 if (apply_right) { 1799 const PetscScalar *ax; 1800 PetscInt nl,i; 1801 1802 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1803 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1804 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1805 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1806 for (i=0;i<ctx->benign_n;i++) { 1807 PetscScalar sum,val; 1808 const PetscInt *idxs; 1809 PetscInt nz,j; 1810 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1811 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1812 sum = 0.; 1813 if (ctx->apply_p0) { 1814 val = ctx->work[idxs[nz-1]]; 1815 for (j=0;j<nz-1;j++) { 1816 sum += ctx->work[idxs[j]]; 1817 ctx->work[idxs[j]] += val; 1818 } 1819 } else { 1820 for (j=0;j<nz-1;j++) { 1821 sum += ctx->work[idxs[j]]; 1822 } 1823 } 1824 ctx->work[idxs[nz-1]] -= sum; 1825 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1826 } 1827 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1828 reset_x = PETSC_TRUE; 1829 } 1830 if (transpose) { 1831 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1832 } else { 1833 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1834 } 1835 if (reset_x) { 1836 ierr = VecResetArray(x);CHKERRQ(ierr); 1837 } 1838 if (apply_left) { 1839 PetscScalar *ay; 1840 PetscInt i; 1841 1842 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1843 for (i=0;i<ctx->benign_n;i++) { 1844 PetscScalar sum,val; 1845 const PetscInt *idxs; 1846 PetscInt nz,j; 1847 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1848 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1849 val = -ay[idxs[nz-1]]; 1850 if (ctx->apply_p0) { 1851 sum = 0.; 1852 for (j=0;j<nz-1;j++) { 1853 sum += ay[idxs[j]]; 1854 ay[idxs[j]] += val; 1855 } 1856 ay[idxs[nz-1]] += sum; 1857 } else { 1858 for (j=0;j<nz-1;j++) { 1859 ay[idxs[j]] += val; 1860 } 1861 ay[idxs[nz-1]] = 0.; 1862 } 1863 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1864 } 1865 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 #undef __FUNCT__ 1871 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1872 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1873 { 1874 PetscErrorCode ierr; 1875 1876 PetscFunctionBegin; 1877 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1883 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1884 { 1885 PetscErrorCode ierr; 1886 1887 PetscFunctionBegin; 1888 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "PCBDDCBenignShellMat" 1894 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1895 { 1896 PC_IS *pcis = (PC_IS*)pc->data; 1897 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1898 PCBDDCBenignMatMult_ctx ctx; 1899 PetscErrorCode ierr; 1900 1901 PetscFunctionBegin; 1902 if (!restore) { 1903 Mat A_IB,A_BI; 1904 PetscScalar *work; 1905 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1906 1907 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1908 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1909 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1910 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1911 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1912 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1913 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1914 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1915 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1916 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1917 ctx->apply_left = PETSC_TRUE; 1918 ctx->apply_right = PETSC_FALSE; 1919 ctx->apply_p0 = PETSC_FALSE; 1920 ctx->benign_n = pcbddc->benign_n; 1921 if (reuse) { 1922 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1923 ctx->free = PETSC_FALSE; 1924 } else { /* TODO: could be optimized for successive solves */ 1925 ISLocalToGlobalMapping N_to_D; 1926 PetscInt i; 1927 1928 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1929 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1930 for (i=0;i<pcbddc->benign_n;i++) { 1931 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1932 } 1933 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1934 ctx->free = PETSC_TRUE; 1935 } 1936 ctx->A = pcis->A_IB; 1937 ctx->work = work; 1938 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1939 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1940 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1941 pcis->A_IB = A_IB; 1942 1943 /* A_BI as A_IB^T */ 1944 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1945 pcbddc->benign_original_mat = pcis->A_BI; 1946 pcis->A_BI = A_BI; 1947 } else { 1948 if (!pcbddc->benign_original_mat) { 1949 PetscFunctionReturn(0); 1950 } 1951 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1952 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1953 pcis->A_IB = ctx->A; 1954 ctx->A = NULL; 1955 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1956 pcis->A_BI = pcbddc->benign_original_mat; 1957 pcbddc->benign_original_mat = NULL; 1958 if (ctx->free) { 1959 PetscInt i; 1960 for (i=0;i<ctx->benign_n;i++) { 1961 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1962 } 1963 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1964 } 1965 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1966 ierr = PetscFree(ctx);CHKERRQ(ierr); 1967 } 1968 PetscFunctionReturn(0); 1969 } 1970 1971 /* used just in bddc debug mode */ 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "PCBDDCBenignProject" 1974 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1975 { 1976 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1977 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1978 Mat An; 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1983 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1984 if (is1) { 1985 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1986 ierr = MatDestroy(&An);CHKERRQ(ierr); 1987 } else { 1988 *B = An; 1989 } 1990 PetscFunctionReturn(0); 1991 } 1992 1993 /* TODO: add reuse flag */ 1994 #undef __FUNCT__ 1995 #define __FUNCT__ "MatSeqAIJCompress" 1996 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1997 { 1998 Mat Bt; 1999 PetscScalar *a,*bdata; 2000 const PetscInt *ii,*ij; 2001 PetscInt m,n,i,nnz,*bii,*bij; 2002 PetscBool flg_row; 2003 PetscErrorCode ierr; 2004 2005 PetscFunctionBegin; 2006 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2007 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2008 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2009 nnz = n; 2010 for (i=0;i<ii[n];i++) { 2011 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2012 } 2013 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2014 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2015 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2016 nnz = 0; 2017 bii[0] = 0; 2018 for (i=0;i<n;i++) { 2019 PetscInt j; 2020 for (j=ii[i];j<ii[i+1];j++) { 2021 PetscScalar entry = a[j]; 2022 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2023 bij[nnz] = ij[j]; 2024 bdata[nnz] = entry; 2025 nnz++; 2026 } 2027 } 2028 bii[i+1] = nnz; 2029 } 2030 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2031 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2032 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2033 { 2034 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2035 b->free_a = PETSC_TRUE; 2036 b->free_ij = PETSC_TRUE; 2037 } 2038 *B = Bt; 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNCT__ 2043 #define __FUNCT__ "MatDetectDisconnectedComponents" 2044 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 2045 { 2046 Mat B; 2047 IS is_dummy,*cc_n; 2048 ISLocalToGlobalMapping l2gmap_dummy; 2049 PCBDDCGraph graph; 2050 PetscInt i,n; 2051 PetscInt *xadj,*adjncy; 2052 PetscInt *xadj_filtered,*adjncy_filtered; 2053 PetscBool flg_row,isseqaij; 2054 PetscErrorCode ierr; 2055 2056 PetscFunctionBegin; 2057 if (!A->rmap->N || !A->cmap->N) { 2058 *ncc = 0; 2059 *cc = NULL; 2060 PetscFunctionReturn(0); 2061 } 2062 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2063 if (!isseqaij && filter) { 2064 PetscBool isseqdense; 2065 2066 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2067 if (!isseqdense) { 2068 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2069 } else { /* TODO: rectangular case and LDA */ 2070 PetscScalar *array; 2071 PetscReal chop=1.e-6; 2072 2073 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2074 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2075 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2076 for (i=0;i<n;i++) { 2077 PetscInt j; 2078 for (j=i+1;j<n;j++) { 2079 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2080 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2081 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2082 } 2083 } 2084 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2085 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2086 } 2087 } else { 2088 B = A; 2089 } 2090 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2091 2092 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2093 if (filter) { 2094 PetscScalar *data; 2095 PetscInt j,cum; 2096 2097 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2098 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2099 cum = 0; 2100 for (i=0;i<n;i++) { 2101 PetscInt t; 2102 2103 for (j=xadj[i];j<xadj[i+1];j++) { 2104 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2105 continue; 2106 } 2107 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2108 } 2109 t = xadj_filtered[i]; 2110 xadj_filtered[i] = cum; 2111 cum += t; 2112 } 2113 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2114 } else { 2115 xadj_filtered = NULL; 2116 adjncy_filtered = NULL; 2117 } 2118 2119 /* compute local connected components using PCBDDCGraph */ 2120 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2121 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2122 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2123 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2124 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2125 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2126 if (xadj_filtered) { 2127 graph->xadj = xadj_filtered; 2128 graph->adjncy = adjncy_filtered; 2129 } else { 2130 graph->xadj = xadj; 2131 graph->adjncy = adjncy; 2132 } 2133 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2134 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2135 /* partial clean up */ 2136 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2137 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2138 if (A != B) { 2139 ierr = MatDestroy(&B);CHKERRQ(ierr); 2140 } 2141 2142 /* get back data */ 2143 if (ncc) *ncc = graph->ncc; 2144 if (cc) { 2145 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2146 for (i=0;i<graph->ncc;i++) { 2147 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2148 } 2149 *cc = cc_n; 2150 } 2151 /* clean up graph */ 2152 graph->xadj = 0; 2153 graph->adjncy = 0; 2154 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "PCBDDCBenignCheck" 2160 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2161 { 2162 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2163 PC_IS* pcis = (PC_IS*)(pc->data); 2164 IS dirIS = NULL; 2165 PetscInt i; 2166 PetscErrorCode ierr; 2167 2168 PetscFunctionBegin; 2169 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2170 if (zerodiag) { 2171 Mat A; 2172 Vec vec3_N; 2173 PetscScalar *vals; 2174 const PetscInt *idxs; 2175 PetscInt nz,*count; 2176 2177 /* p0 */ 2178 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2179 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2180 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2181 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2182 for (i=0;i<nz;i++) vals[i] = 1.; 2183 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2184 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2185 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2186 /* v_I */ 2187 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2188 for (i=0;i<nz;i++) vals[i] = 0.; 2189 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2190 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2191 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2192 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2193 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2194 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2195 if (dirIS) { 2196 PetscInt n; 2197 2198 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2199 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2200 for (i=0;i<n;i++) vals[i] = 0.; 2201 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2202 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2203 } 2204 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2205 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2206 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2207 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2208 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2209 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2210 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2211 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2212 ierr = PetscFree(vals);CHKERRQ(ierr); 2213 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2214 2215 /* there should not be any pressure dofs lying on the interface */ 2216 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2217 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2218 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2219 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2220 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2221 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2222 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2223 ierr = PetscFree(count);CHKERRQ(ierr); 2224 } 2225 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2226 2227 /* check PCBDDCBenignGetOrSetP0 */ 2228 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2229 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2230 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2231 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2232 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2233 for (i=0;i<pcbddc->benign_n;i++) { 2234 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2235 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2236 } 2237 PetscFunctionReturn(0); 2238 } 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 2242 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2243 { 2244 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2245 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2246 PetscInt nz,n; 2247 PetscInt *interior_dofs,n_interior_dofs,nneu; 2248 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2249 PetscErrorCode ierr; 2250 2251 PetscFunctionBegin; 2252 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2253 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2254 for (n=0;n<pcbddc->benign_n;n++) { 2255 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2256 } 2257 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2258 pcbddc->benign_n = 0; 2259 /* if a local info on dofs is present, assumes that the last field represents "pressures" 2260 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2261 Checks if all the pressure dofs in each subdomain have a zero diagonal 2262 If not, a change of basis on pressures is not needed 2263 since the local Schur complements are already SPD 2264 */ 2265 has_null_pressures = PETSC_TRUE; 2266 have_null = PETSC_TRUE; 2267 if (pcbddc->n_ISForDofsLocal) { 2268 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2269 2270 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2271 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2272 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2273 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2274 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2275 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2276 if (!sorted) { 2277 ierr = ISSort(pressures);CHKERRQ(ierr); 2278 } 2279 } else { 2280 pressures = NULL; 2281 } 2282 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2283 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2284 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2285 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2286 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2287 if (!sorted) { 2288 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2289 } 2290 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2291 zerodiag_save = zerodiag; 2292 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2293 if (!nz) { 2294 if (n) have_null = PETSC_FALSE; 2295 has_null_pressures = PETSC_FALSE; 2296 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2297 } 2298 recompute_zerodiag = PETSC_FALSE; 2299 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2300 zerodiag_subs = NULL; 2301 pcbddc->benign_n = 0; 2302 n_interior_dofs = 0; 2303 interior_dofs = NULL; 2304 nneu = 0; 2305 if (pcbddc->NeumannBoundariesLocal) { 2306 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2307 } 2308 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2309 if (checkb) { /* need to compute interior nodes */ 2310 PetscInt n,i,j; 2311 PetscInt n_neigh,*neigh,*n_shared,**shared; 2312 PetscInt *iwork; 2313 2314 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2315 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2316 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2317 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2318 for (i=1;i<n_neigh;i++) 2319 for (j=0;j<n_shared[i];j++) 2320 iwork[shared[i][j]] += 1; 2321 for (i=0;i<n;i++) 2322 if (!iwork[i]) 2323 interior_dofs[n_interior_dofs++] = i; 2324 ierr = PetscFree(iwork);CHKERRQ(ierr); 2325 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2326 } 2327 if (has_null_pressures) { 2328 IS *subs; 2329 PetscInt nsubs,i,j,nl; 2330 const PetscInt *idxs; 2331 PetscScalar *array; 2332 Vec *work; 2333 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2334 2335 subs = pcbddc->local_subs; 2336 nsubs = pcbddc->n_local_subs; 2337 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2338 if (checkb) { 2339 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2340 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2341 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2342 /* work[0] = 1_p */ 2343 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2344 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2345 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2346 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2347 /* work[0] = 1_v */ 2348 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2349 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2350 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2351 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2352 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2353 } 2354 if (nsubs > 1) { 2355 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2356 for (i=0;i<nsubs;i++) { 2357 ISLocalToGlobalMapping l2g; 2358 IS t_zerodiag_subs; 2359 PetscInt nl; 2360 2361 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2362 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2363 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2364 if (nl) { 2365 PetscBool valid = PETSC_TRUE; 2366 2367 if (checkb) { 2368 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2369 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2370 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2371 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2372 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2373 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2374 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2375 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2376 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2377 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2378 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2379 for (j=0;j<n_interior_dofs;j++) { 2380 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2381 valid = PETSC_FALSE; 2382 break; 2383 } 2384 } 2385 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2386 } 2387 if (valid && nneu) { 2388 const PetscInt *idxs; 2389 PetscInt nzb; 2390 2391 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2392 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2393 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2394 if (nzb) valid = PETSC_FALSE; 2395 } 2396 if (valid && pressures) { 2397 IS t_pressure_subs; 2398 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2399 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2400 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2401 } 2402 if (valid) { 2403 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2404 pcbddc->benign_n++; 2405 } else { 2406 recompute_zerodiag = PETSC_TRUE; 2407 } 2408 } 2409 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2410 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2411 } 2412 } else { /* there's just one subdomain (or zero if they have not been detected */ 2413 PetscBool valid = PETSC_TRUE; 2414 2415 if (nneu) valid = PETSC_FALSE; 2416 if (valid && pressures) { 2417 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2418 } 2419 if (valid && checkb) { 2420 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2421 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2422 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2423 for (j=0;j<n_interior_dofs;j++) { 2424 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2425 valid = PETSC_FALSE; 2426 break; 2427 } 2428 } 2429 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2430 } 2431 if (valid) { 2432 pcbddc->benign_n = 1; 2433 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2434 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2435 zerodiag_subs[0] = zerodiag; 2436 } 2437 } 2438 if (checkb) { 2439 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2440 } 2441 } 2442 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2443 2444 if (!pcbddc->benign_n) { 2445 PetscInt n; 2446 2447 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2448 recompute_zerodiag = PETSC_FALSE; 2449 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2450 if (n) { 2451 has_null_pressures = PETSC_FALSE; 2452 have_null = PETSC_FALSE; 2453 } 2454 } 2455 2456 /* final check for null pressures */ 2457 if (zerodiag && pressures) { 2458 PetscInt nz,np; 2459 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2460 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2461 if (nz != np) have_null = PETSC_FALSE; 2462 } 2463 2464 if (recompute_zerodiag) { 2465 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2466 if (pcbddc->benign_n == 1) { 2467 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2468 zerodiag = zerodiag_subs[0]; 2469 } else { 2470 PetscInt i,nzn,*new_idxs; 2471 2472 nzn = 0; 2473 for (i=0;i<pcbddc->benign_n;i++) { 2474 PetscInt ns; 2475 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2476 nzn += ns; 2477 } 2478 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2479 nzn = 0; 2480 for (i=0;i<pcbddc->benign_n;i++) { 2481 PetscInt ns,*idxs; 2482 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2483 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2484 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2485 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2486 nzn += ns; 2487 } 2488 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2489 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2490 } 2491 have_null = PETSC_FALSE; 2492 } 2493 2494 /* Prepare matrix to compute no-net-flux */ 2495 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2496 Mat A,loc_divudotp; 2497 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2498 IS row,col,isused = NULL; 2499 PetscInt M,N,n,st,n_isused; 2500 2501 if (pressures) { 2502 isused = pressures; 2503 } else { 2504 isused = zerodiag_save; 2505 } 2506 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2507 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2508 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2509 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2510 n_isused = 0; 2511 if (isused) { 2512 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2513 } 2514 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2515 st = st-n_isused; 2516 if (n) { 2517 const PetscInt *gidxs; 2518 2519 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2520 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2521 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2522 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2523 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2524 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2525 } else { 2526 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2527 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2528 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2529 } 2530 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2531 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2532 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2533 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2534 ierr = ISDestroy(&row);CHKERRQ(ierr); 2535 ierr = ISDestroy(&col);CHKERRQ(ierr); 2536 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2537 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2538 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2539 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2540 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2541 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2542 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2543 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2544 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2545 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2546 } 2547 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2548 2549 /* change of basis and p0 dofs */ 2550 if (has_null_pressures) { 2551 IS zerodiagc; 2552 const PetscInt *idxs,*idxsc; 2553 PetscInt i,s,*nnz; 2554 2555 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2556 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2557 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2558 /* local change of basis for pressures */ 2559 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2560 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2561 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2562 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2563 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2564 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2565 for (i=0;i<pcbddc->benign_n;i++) { 2566 PetscInt nzs,j; 2567 2568 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2569 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2570 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2571 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2572 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2573 } 2574 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2575 ierr = PetscFree(nnz);CHKERRQ(ierr); 2576 /* set identity on velocities */ 2577 for (i=0;i<n-nz;i++) { 2578 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2579 } 2580 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2581 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2582 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2583 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2584 /* set change on pressures */ 2585 for (s=0;s<pcbddc->benign_n;s++) { 2586 PetscScalar *array; 2587 PetscInt nzs; 2588 2589 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2590 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2591 for (i=0;i<nzs-1;i++) { 2592 PetscScalar vals[2]; 2593 PetscInt cols[2]; 2594 2595 cols[0] = idxs[i]; 2596 cols[1] = idxs[nzs-1]; 2597 vals[0] = 1.; 2598 vals[1] = 1.; 2599 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2600 } 2601 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2602 for (i=0;i<nzs-1;i++) array[i] = -1.; 2603 array[nzs-1] = 1.; 2604 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2605 /* store local idxs for p0 */ 2606 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2607 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2608 ierr = PetscFree(array);CHKERRQ(ierr); 2609 } 2610 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2611 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2612 /* project if needed */ 2613 if (pcbddc->benign_change_explicit) { 2614 Mat M; 2615 2616 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2617 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2618 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2619 ierr = MatDestroy(&M);CHKERRQ(ierr); 2620 } 2621 /* store global idxs for p0 */ 2622 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2623 } 2624 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2625 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2626 2627 /* determines if the coarse solver will be singular or not */ 2628 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2629 /* determines if the problem has subdomains with 0 pressure block */ 2630 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2631 *zerodiaglocal = zerodiag; 2632 PetscFunctionReturn(0); 2633 } 2634 2635 #undef __FUNCT__ 2636 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2637 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2638 { 2639 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2640 PetscScalar *array; 2641 PetscErrorCode ierr; 2642 2643 PetscFunctionBegin; 2644 if (!pcbddc->benign_sf) { 2645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2646 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2647 } 2648 if (get) { 2649 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2650 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2651 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2652 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2653 } else { 2654 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2655 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2656 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2657 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2658 } 2659 PetscFunctionReturn(0); 2660 } 2661 2662 #undef __FUNCT__ 2663 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2664 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2665 { 2666 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2667 PetscErrorCode ierr; 2668 2669 PetscFunctionBegin; 2670 /* TODO: add error checking 2671 - avoid nested pop (or push) calls. 2672 - cannot push before pop. 2673 - cannot call this if pcbddc->local_mat is NULL 2674 */ 2675 if (!pcbddc->benign_n) { 2676 PetscFunctionReturn(0); 2677 } 2678 if (pop) { 2679 if (pcbddc->benign_change_explicit) { 2680 IS is_p0; 2681 MatReuse reuse; 2682 2683 /* extract B_0 */ 2684 reuse = MAT_INITIAL_MATRIX; 2685 if (pcbddc->benign_B0) { 2686 reuse = MAT_REUSE_MATRIX; 2687 } 2688 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2689 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2690 /* remove rows and cols from local problem */ 2691 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2692 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2693 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2694 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2695 } else { 2696 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2697 PetscScalar *vals; 2698 PetscInt i,n,*idxs_ins; 2699 2700 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2701 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2702 if (!pcbddc->benign_B0) { 2703 PetscInt *nnz; 2704 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2705 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2706 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2707 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2708 for (i=0;i<pcbddc->benign_n;i++) { 2709 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2710 nnz[i] = n - nnz[i]; 2711 } 2712 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2713 ierr = PetscFree(nnz);CHKERRQ(ierr); 2714 } 2715 2716 for (i=0;i<pcbddc->benign_n;i++) { 2717 PetscScalar *array; 2718 PetscInt *idxs,j,nz,cum; 2719 2720 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2721 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2722 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2723 for (j=0;j<nz;j++) vals[j] = 1.; 2724 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2725 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2726 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2727 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2728 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2729 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2730 cum = 0; 2731 for (j=0;j<n;j++) { 2732 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2733 vals[cum] = array[j]; 2734 idxs_ins[cum] = j; 2735 cum++; 2736 } 2737 } 2738 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2739 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2740 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2741 } 2742 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2743 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2744 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2745 } 2746 } else { /* push */ 2747 if (pcbddc->benign_change_explicit) { 2748 PetscInt i; 2749 2750 for (i=0;i<pcbddc->benign_n;i++) { 2751 PetscScalar *B0_vals; 2752 PetscInt *B0_cols,B0_ncol; 2753 2754 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2755 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2756 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2757 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2758 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2759 } 2760 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2761 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2762 } else { 2763 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2764 } 2765 } 2766 PetscFunctionReturn(0); 2767 } 2768 2769 #undef __FUNCT__ 2770 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2771 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2772 { 2773 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2774 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2775 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2776 PetscBLASInt *B_iwork,*B_ifail; 2777 PetscScalar *work,lwork; 2778 PetscScalar *St,*S,*eigv; 2779 PetscScalar *Sarray,*Starray; 2780 PetscReal *eigs,thresh; 2781 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2782 PetscBool allocated_S_St; 2783 #if defined(PETSC_USE_COMPLEX) 2784 PetscReal *rwork; 2785 #endif 2786 PetscErrorCode ierr; 2787 2788 PetscFunctionBegin; 2789 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2790 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2791 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2792 2793 if (pcbddc->dbg_flag) { 2794 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2795 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2796 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2797 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2798 } 2799 2800 if (pcbddc->dbg_flag) { 2801 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2802 } 2803 2804 /* max size of subsets */ 2805 mss = 0; 2806 for (i=0;i<sub_schurs->n_subs;i++) { 2807 PetscInt subset_size; 2808 2809 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2810 mss = PetscMax(mss,subset_size); 2811 } 2812 2813 /* min/max and threshold */ 2814 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2815 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2816 nmax = PetscMax(nmin,nmax); 2817 allocated_S_St = PETSC_FALSE; 2818 if (nmin) { 2819 allocated_S_St = PETSC_TRUE; 2820 } 2821 2822 /* allocate lapack workspace */ 2823 cum = cum2 = 0; 2824 maxneigs = 0; 2825 for (i=0;i<sub_schurs->n_subs;i++) { 2826 PetscInt n,subset_size; 2827 2828 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2829 n = PetscMin(subset_size,nmax); 2830 cum += subset_size; 2831 cum2 += subset_size*n; 2832 maxneigs = PetscMax(maxneigs,n); 2833 } 2834 if (mss) { 2835 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2836 PetscBLASInt B_itype = 1; 2837 PetscBLASInt B_N = mss; 2838 PetscReal zero = 0.0; 2839 PetscReal eps = 0.0; /* dlamch? */ 2840 2841 B_lwork = -1; 2842 S = NULL; 2843 St = NULL; 2844 eigs = NULL; 2845 eigv = NULL; 2846 B_iwork = NULL; 2847 B_ifail = NULL; 2848 #if defined(PETSC_USE_COMPLEX) 2849 rwork = NULL; 2850 #endif 2851 thresh = 1.0; 2852 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2853 #if defined(PETSC_USE_COMPLEX) 2854 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2855 #else 2856 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2857 #endif 2858 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2859 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2860 } else { 2861 /* TODO */ 2862 } 2863 } else { 2864 lwork = 0; 2865 } 2866 2867 nv = 0; 2868 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2869 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2870 } 2871 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2872 if (allocated_S_St) { 2873 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2874 } 2875 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2876 #if defined(PETSC_USE_COMPLEX) 2877 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2878 #endif 2879 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2880 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2881 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2882 nv+cum,&pcbddc->adaptive_constraints_idxs, 2883 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2884 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2885 2886 maxneigs = 0; 2887 cum = cumarray = 0; 2888 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2889 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2890 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2891 const PetscInt *idxs; 2892 2893 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2894 for (cum=0;cum<nv;cum++) { 2895 pcbddc->adaptive_constraints_n[cum] = 1; 2896 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2897 pcbddc->adaptive_constraints_data[cum] = 1.0; 2898 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2899 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2900 } 2901 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2902 } 2903 2904 if (mss) { /* multilevel */ 2905 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2906 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2907 } 2908 2909 thresh = pcbddc->adaptive_threshold; 2910 for (i=0;i<sub_schurs->n_subs;i++) { 2911 const PetscInt *idxs; 2912 PetscReal upper,lower; 2913 PetscInt j,subset_size,eigs_start = 0; 2914 PetscBLASInt B_N; 2915 PetscBool same_data = PETSC_FALSE; 2916 2917 if (pcbddc->use_deluxe_scaling) { 2918 upper = PETSC_MAX_REAL; 2919 lower = thresh; 2920 } else { 2921 upper = 1./thresh; 2922 lower = 0.; 2923 } 2924 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2925 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2926 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2927 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2928 if (sub_schurs->is_hermitian) { 2929 PetscInt j,k; 2930 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2931 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2932 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2933 } 2934 for (j=0;j<subset_size;j++) { 2935 for (k=j;k<subset_size;k++) { 2936 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2937 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2938 } 2939 } 2940 } else { 2941 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2942 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2943 } 2944 } else { 2945 S = Sarray + cumarray; 2946 St = Starray + cumarray; 2947 } 2948 /* see if we can save some work */ 2949 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2950 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2951 } 2952 2953 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2954 B_neigs = 0; 2955 } else { 2956 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2957 PetscBLASInt B_itype = 1; 2958 PetscBLASInt B_IL, B_IU; 2959 PetscReal eps = -1.0; /* dlamch? */ 2960 PetscInt nmin_s; 2961 PetscBool compute_range = PETSC_FALSE; 2962 2963 if (pcbddc->dbg_flag) { 2964 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2965 } 2966 2967 compute_range = PETSC_FALSE; 2968 if (thresh > 1.+PETSC_SMALL && !same_data) { 2969 compute_range = PETSC_TRUE; 2970 } 2971 2972 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2973 if (compute_range) { 2974 2975 /* ask for eigenvalues larger than thresh */ 2976 #if defined(PETSC_USE_COMPLEX) 2977 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2978 #else 2979 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2980 #endif 2981 } else if (!same_data) { 2982 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2983 B_IL = 1; 2984 #if defined(PETSC_USE_COMPLEX) 2985 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2986 #else 2987 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2988 #endif 2989 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2990 PetscInt k; 2991 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2992 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2993 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2994 nmin = nmax; 2995 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2996 for (k=0;k<nmax;k++) { 2997 eigs[k] = 1./PETSC_SMALL; 2998 eigv[k*(subset_size+1)] = 1.0; 2999 } 3000 } 3001 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3002 if (B_ierr) { 3003 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3004 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3005 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3006 } 3007 3008 if (B_neigs > nmax) { 3009 if (pcbddc->dbg_flag) { 3010 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3011 } 3012 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3013 B_neigs = nmax; 3014 } 3015 3016 nmin_s = PetscMin(nmin,B_N); 3017 if (B_neigs < nmin_s) { 3018 PetscBLASInt B_neigs2; 3019 3020 if (pcbddc->use_deluxe_scaling) { 3021 B_IL = B_N - nmin_s + 1; 3022 B_IU = B_N - B_neigs; 3023 } else { 3024 B_IL = B_neigs + 1; 3025 B_IU = nmin_s; 3026 } 3027 if (pcbddc->dbg_flag) { 3028 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3029 } 3030 if (sub_schurs->is_hermitian) { 3031 PetscInt j,k; 3032 for (j=0;j<subset_size;j++) { 3033 for (k=j;k<subset_size;k++) { 3034 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3035 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3036 } 3037 } 3038 } else { 3039 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3040 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3041 } 3042 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3043 #if defined(PETSC_USE_COMPLEX) 3044 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3045 #else 3046 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3047 #endif 3048 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3049 B_neigs += B_neigs2; 3050 } 3051 if (B_ierr) { 3052 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3053 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3054 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3055 } 3056 if (pcbddc->dbg_flag) { 3057 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3058 for (j=0;j<B_neigs;j++) { 3059 if (eigs[j] == 0.0) { 3060 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3061 } else { 3062 if (pcbddc->use_deluxe_scaling) { 3063 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3064 } else { 3065 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3066 } 3067 } 3068 } 3069 } 3070 } else { 3071 /* TODO */ 3072 } 3073 } 3074 /* change the basis back to the original one */ 3075 if (sub_schurs->change) { 3076 Mat change,phi,phit; 3077 3078 if (pcbddc->dbg_flag > 1) { 3079 PetscInt ii; 3080 for (ii=0;ii<B_neigs;ii++) { 3081 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3082 for (j=0;j<B_N;j++) { 3083 #if defined(PETSC_USE_COMPLEX) 3084 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3085 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3086 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3087 #else 3088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3089 #endif 3090 } 3091 } 3092 } 3093 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3094 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3095 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3096 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3097 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3098 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3099 } 3100 maxneigs = PetscMax(B_neigs,maxneigs); 3101 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3102 if (B_neigs) { 3103 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3104 3105 if (pcbddc->dbg_flag > 1) { 3106 PetscInt ii; 3107 for (ii=0;ii<B_neigs;ii++) { 3108 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3109 for (j=0;j<B_N;j++) { 3110 #if defined(PETSC_USE_COMPLEX) 3111 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3112 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3114 #else 3115 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3116 #endif 3117 } 3118 } 3119 } 3120 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3121 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3122 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3123 cum++; 3124 } 3125 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3126 /* shift for next computation */ 3127 cumarray += subset_size*subset_size; 3128 } 3129 if (pcbddc->dbg_flag) { 3130 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3131 } 3132 3133 if (mss) { 3134 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3135 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3136 /* destroy matrices (junk) */ 3137 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3138 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3139 } 3140 if (allocated_S_St) { 3141 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3142 } 3143 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3144 #if defined(PETSC_USE_COMPLEX) 3145 ierr = PetscFree(rwork);CHKERRQ(ierr); 3146 #endif 3147 if (pcbddc->dbg_flag) { 3148 PetscInt maxneigs_r; 3149 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3150 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3151 } 3152 PetscFunctionReturn(0); 3153 } 3154 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "PCBDDCSetUpSolvers" 3157 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3158 { 3159 PetscScalar *coarse_submat_vals; 3160 PetscErrorCode ierr; 3161 3162 PetscFunctionBegin; 3163 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3164 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3165 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3166 3167 /* Setup local neumann solver ksp_R */ 3168 /* PCBDDCSetUpLocalScatters should be called first! */ 3169 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3170 3171 /* 3172 Setup local correction and local part of coarse basis. 3173 Gives back the dense local part of the coarse matrix in column major ordering 3174 */ 3175 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3176 3177 /* Compute total number of coarse nodes and setup coarse solver */ 3178 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3179 3180 /* free */ 3181 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3182 PetscFunctionReturn(0); 3183 } 3184 3185 #undef __FUNCT__ 3186 #define __FUNCT__ "PCBDDCResetCustomization" 3187 PetscErrorCode PCBDDCResetCustomization(PC pc) 3188 { 3189 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3190 PetscErrorCode ierr; 3191 3192 PetscFunctionBegin; 3193 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3194 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3195 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3196 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3197 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3198 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3199 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3200 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3201 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3202 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3203 PetscFunctionReturn(0); 3204 } 3205 3206 #undef __FUNCT__ 3207 #define __FUNCT__ "PCBDDCResetTopography" 3208 PetscErrorCode PCBDDCResetTopography(PC pc) 3209 { 3210 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3211 PetscInt i; 3212 PetscErrorCode ierr; 3213 3214 PetscFunctionBegin; 3215 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3216 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3217 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3218 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3219 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3220 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3221 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3222 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3223 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3224 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3225 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3226 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3227 for (i=0;i<pcbddc->n_local_subs;i++) { 3228 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3229 } 3230 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3231 if (pcbddc->sub_schurs) { 3232 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 3233 } 3234 pcbddc->graphanalyzed = PETSC_FALSE; 3235 pcbddc->recompute_topography = PETSC_TRUE; 3236 PetscFunctionReturn(0); 3237 } 3238 3239 #undef __FUNCT__ 3240 #define __FUNCT__ "PCBDDCResetSolvers" 3241 PetscErrorCode PCBDDCResetSolvers(PC pc) 3242 { 3243 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3244 PetscErrorCode ierr; 3245 3246 PetscFunctionBegin; 3247 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3248 if (pcbddc->coarse_phi_B) { 3249 PetscScalar *array; 3250 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3251 ierr = PetscFree(array);CHKERRQ(ierr); 3252 } 3253 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3254 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3255 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3256 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3257 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3258 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3259 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3260 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3261 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3262 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3263 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3264 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3265 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3266 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3267 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 3268 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 3269 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3270 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3271 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3272 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3273 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3274 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3275 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3276 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3277 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3278 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3279 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3280 if (pcbddc->benign_zerodiag_subs) { 3281 PetscInt i; 3282 for (i=0;i<pcbddc->benign_n;i++) { 3283 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3284 } 3285 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3286 } 3287 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3288 PetscFunctionReturn(0); 3289 } 3290 3291 #undef __FUNCT__ 3292 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 3293 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3294 { 3295 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3296 PC_IS *pcis = (PC_IS*)pc->data; 3297 VecType impVecType; 3298 PetscInt n_constraints,n_R,old_size; 3299 PetscErrorCode ierr; 3300 3301 PetscFunctionBegin; 3302 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3303 n_R = pcis->n - pcbddc->n_vertices; 3304 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3305 /* local work vectors (try to avoid unneeded work)*/ 3306 /* R nodes */ 3307 old_size = -1; 3308 if (pcbddc->vec1_R) { 3309 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3310 } 3311 if (n_R != old_size) { 3312 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3313 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3314 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3315 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3316 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3317 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3318 } 3319 /* local primal dofs */ 3320 old_size = -1; 3321 if (pcbddc->vec1_P) { 3322 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3323 } 3324 if (pcbddc->local_primal_size != old_size) { 3325 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3326 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3327 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3328 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3329 } 3330 /* local explicit constraints */ 3331 old_size = -1; 3332 if (pcbddc->vec1_C) { 3333 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3334 } 3335 if (n_constraints && n_constraints != old_size) { 3336 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3337 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3338 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3339 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3340 } 3341 PetscFunctionReturn(0); 3342 } 3343 3344 #undef __FUNCT__ 3345 #define __FUNCT__ "PCBDDCSetUpCorrection" 3346 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3347 { 3348 PetscErrorCode ierr; 3349 /* pointers to pcis and pcbddc */ 3350 PC_IS* pcis = (PC_IS*)pc->data; 3351 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3352 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3353 /* submatrices of local problem */ 3354 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3355 /* submatrices of local coarse problem */ 3356 Mat S_VV,S_CV,S_VC,S_CC; 3357 /* working matrices */ 3358 Mat C_CR; 3359 /* additional working stuff */ 3360 PC pc_R; 3361 Mat F; 3362 Vec dummy_vec; 3363 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3364 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3365 PetscScalar *work; 3366 PetscInt *idx_V_B; 3367 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3368 PetscInt i,n_R,n_D,n_B; 3369 3370 /* some shortcuts to scalars */ 3371 PetscScalar one=1.0,m_one=-1.0; 3372 3373 PetscFunctionBegin; 3374 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3375 3376 /* Set Non-overlapping dimensions */ 3377 n_vertices = pcbddc->n_vertices; 3378 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3379 n_B = pcis->n_B; 3380 n_D = pcis->n - n_B; 3381 n_R = pcis->n - n_vertices; 3382 3383 /* vertices in boundary numbering */ 3384 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3385 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3386 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3387 3388 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3389 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3390 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3391 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3392 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3393 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3394 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3395 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3396 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3397 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3398 3399 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3400 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3401 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3402 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3403 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3404 lda_rhs = n_R; 3405 need_benign_correction = PETSC_FALSE; 3406 if (isLU || isILU || isCHOL) { 3407 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3408 } else if (sub_schurs && sub_schurs->reuse_solver) { 3409 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3410 MatFactorType type; 3411 3412 F = reuse_solver->F; 3413 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3414 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3415 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3416 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3417 } else { 3418 F = NULL; 3419 } 3420 3421 /* allocate workspace */ 3422 n = 0; 3423 if (n_constraints) { 3424 n += lda_rhs*n_constraints; 3425 } 3426 if (n_vertices) { 3427 n = PetscMax(2*lda_rhs*n_vertices,n); 3428 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3429 } 3430 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3431 3432 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3433 dummy_vec = NULL; 3434 if (need_benign_correction && lda_rhs != n_R && F) { 3435 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3436 } 3437 3438 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3439 if (n_constraints) { 3440 Mat M1,M2,M3,C_B; 3441 IS is_aux; 3442 PetscScalar *array,*array2; 3443 3444 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3445 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3446 3447 /* Extract constraints on R nodes: C_{CR} */ 3448 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3449 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3450 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3451 3452 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3453 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3454 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3455 for (i=0;i<n_constraints;i++) { 3456 const PetscScalar *row_cmat_values; 3457 const PetscInt *row_cmat_indices; 3458 PetscInt size_of_constraint,j; 3459 3460 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3461 for (j=0;j<size_of_constraint;j++) { 3462 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3463 } 3464 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3465 } 3466 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3467 if (F) { 3468 Mat B; 3469 3470 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3471 if (need_benign_correction) { 3472 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3473 3474 /* rhs is already zero on interior dofs, no need to change the rhs */ 3475 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3476 } 3477 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3478 if (need_benign_correction) { 3479 PetscScalar *marr; 3480 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3481 3482 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3483 if (lda_rhs != n_R) { 3484 for (i=0;i<n_constraints;i++) { 3485 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3486 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3487 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3488 } 3489 } else { 3490 for (i=0;i<n_constraints;i++) { 3491 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3492 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3493 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3494 } 3495 } 3496 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3497 } 3498 ierr = MatDestroy(&B);CHKERRQ(ierr); 3499 } else { 3500 PetscScalar *marr; 3501 3502 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3503 for (i=0;i<n_constraints;i++) { 3504 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3505 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3506 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3507 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3508 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3509 } 3510 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3511 } 3512 if (!pcbddc->switch_static) { 3513 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3514 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3515 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3516 for (i=0;i<n_constraints;i++) { 3517 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3518 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3519 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3520 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3521 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3522 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3523 } 3524 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3525 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3526 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3527 } else { 3528 if (lda_rhs != n_R) { 3529 IS dummy; 3530 3531 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3532 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3533 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3534 } else { 3535 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3536 pcbddc->local_auxmat2 = local_auxmat2_R; 3537 } 3538 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3539 } 3540 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3541 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3542 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3543 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3544 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3545 if (isCHOL) { 3546 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3547 } else { 3548 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3549 } 3550 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3551 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3552 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3553 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3554 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3555 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3556 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3557 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3558 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3559 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3560 } 3561 3562 /* Get submatrices from subdomain matrix */ 3563 if (n_vertices) { 3564 IS is_aux; 3565 3566 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3567 IS tis; 3568 3569 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3570 ierr = ISSort(tis);CHKERRQ(ierr); 3571 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3572 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3573 } else { 3574 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3575 } 3576 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3577 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3578 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3579 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3580 } 3581 3582 /* Matrix of coarse basis functions (local) */ 3583 if (pcbddc->coarse_phi_B) { 3584 PetscInt on_B,on_primal,on_D=n_D; 3585 if (pcbddc->coarse_phi_D) { 3586 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3587 } 3588 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3589 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3590 PetscScalar *marray; 3591 3592 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3593 ierr = PetscFree(marray);CHKERRQ(ierr); 3594 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3595 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3596 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3597 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3598 } 3599 } 3600 3601 if (!pcbddc->coarse_phi_B) { 3602 PetscScalar *marray; 3603 3604 n = n_B*pcbddc->local_primal_size; 3605 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3606 n += n_D*pcbddc->local_primal_size; 3607 } 3608 if (!pcbddc->symmetric_primal) { 3609 n *= 2; 3610 } 3611 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3612 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3613 n = n_B*pcbddc->local_primal_size; 3614 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3616 n += n_D*pcbddc->local_primal_size; 3617 } 3618 if (!pcbddc->symmetric_primal) { 3619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3620 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3621 n = n_B*pcbddc->local_primal_size; 3622 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3623 } 3624 } else { 3625 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3626 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3627 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3628 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3629 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3630 } 3631 } 3632 } 3633 3634 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3635 p0_lidx_I = NULL; 3636 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3637 const PetscInt *idxs; 3638 3639 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3640 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3641 for (i=0;i<pcbddc->benign_n;i++) { 3642 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3643 } 3644 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3645 } 3646 3647 /* vertices */ 3648 if (n_vertices) { 3649 3650 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3651 3652 if (n_R) { 3653 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3654 PetscBLASInt B_N,B_one = 1; 3655 PetscScalar *x,*y; 3656 PetscBool isseqaij; 3657 3658 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3659 if (need_benign_correction) { 3660 ISLocalToGlobalMapping RtoN; 3661 IS is_p0; 3662 PetscInt *idxs_p0,n; 3663 3664 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3665 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3666 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3667 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3668 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3669 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3670 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3671 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3672 } 3673 3674 if (lda_rhs == n_R) { 3675 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3676 } else { 3677 PetscScalar *av,*array; 3678 const PetscInt *xadj,*adjncy; 3679 PetscInt n; 3680 PetscBool flg_row; 3681 3682 array = work+lda_rhs*n_vertices; 3683 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3684 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3685 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3686 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3687 for (i=0;i<n;i++) { 3688 PetscInt j; 3689 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3690 } 3691 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3692 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3693 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3694 } 3695 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3696 if (need_benign_correction) { 3697 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3698 PetscScalar *marr; 3699 3700 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3701 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3702 3703 | 0 0 0 | (V) 3704 L = | 0 0 -1 | (P-p0) 3705 | 0 0 -1 | (p0) 3706 3707 */ 3708 for (i=0;i<reuse_solver->benign_n;i++) { 3709 const PetscScalar *vals; 3710 const PetscInt *idxs,*idxs_zero; 3711 PetscInt n,j,nz; 3712 3713 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3714 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3715 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3716 for (j=0;j<n;j++) { 3717 PetscScalar val = vals[j]; 3718 PetscInt k,col = idxs[j]; 3719 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3720 } 3721 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3722 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3723 } 3724 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3725 } 3726 if (F) { 3727 /* need to correct the rhs */ 3728 if (need_benign_correction) { 3729 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3730 PetscScalar *marr; 3731 3732 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3733 if (lda_rhs != n_R) { 3734 for (i=0;i<n_vertices;i++) { 3735 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3736 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3737 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3738 } 3739 } else { 3740 for (i=0;i<n_vertices;i++) { 3741 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3742 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3743 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3744 } 3745 } 3746 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3747 } 3748 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3749 /* need to correct the solution */ 3750 if (need_benign_correction) { 3751 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3752 PetscScalar *marr; 3753 3754 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3755 if (lda_rhs != n_R) { 3756 for (i=0;i<n_vertices;i++) { 3757 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3758 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3759 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3760 } 3761 } else { 3762 for (i=0;i<n_vertices;i++) { 3763 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3764 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3765 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3766 } 3767 } 3768 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3769 } 3770 } else { 3771 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3772 for (i=0;i<n_vertices;i++) { 3773 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3774 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3775 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3776 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3777 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3778 } 3779 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3780 } 3781 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3782 /* S_VV and S_CV */ 3783 if (n_constraints) { 3784 Mat B; 3785 3786 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3787 for (i=0;i<n_vertices;i++) { 3788 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3789 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3790 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3791 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3792 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3793 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3794 } 3795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3796 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3797 ierr = MatDestroy(&B);CHKERRQ(ierr); 3798 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3799 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3800 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3801 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3802 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3803 ierr = MatDestroy(&B);CHKERRQ(ierr); 3804 } 3805 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3806 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3807 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3808 } 3809 if (lda_rhs != n_R) { 3810 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3811 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3812 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3813 } 3814 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3815 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3816 if (need_benign_correction) { 3817 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3818 PetscScalar *marr,*sums; 3819 3820 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3821 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3822 for (i=0;i<reuse_solver->benign_n;i++) { 3823 const PetscScalar *vals; 3824 const PetscInt *idxs,*idxs_zero; 3825 PetscInt n,j,nz; 3826 3827 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3828 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3829 for (j=0;j<n_vertices;j++) { 3830 PetscInt k; 3831 sums[j] = 0.; 3832 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3833 } 3834 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3835 for (j=0;j<n;j++) { 3836 PetscScalar val = vals[j]; 3837 PetscInt k; 3838 for (k=0;k<n_vertices;k++) { 3839 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3840 } 3841 } 3842 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3843 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3844 } 3845 ierr = PetscFree(sums);CHKERRQ(ierr); 3846 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3847 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3848 } 3849 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3850 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3851 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3852 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3853 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3854 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3855 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3856 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3857 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3858 } else { 3859 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3860 } 3861 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3862 3863 /* coarse basis functions */ 3864 for (i=0;i<n_vertices;i++) { 3865 PetscScalar *y; 3866 3867 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3868 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3869 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3870 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3871 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3872 y[n_B*i+idx_V_B[i]] = 1.0; 3873 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3874 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3875 3876 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3877 PetscInt j; 3878 3879 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3880 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3881 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3882 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3883 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3884 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3885 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3886 } 3887 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3888 } 3889 /* if n_R == 0 the object is not destroyed */ 3890 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3891 } 3892 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3893 3894 if (n_constraints) { 3895 Mat B; 3896 3897 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3898 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3899 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3900 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3901 if (n_vertices) { 3902 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3903 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3904 } else { 3905 Mat S_VCt; 3906 3907 if (lda_rhs != n_R) { 3908 ierr = MatDestroy(&B);CHKERRQ(ierr); 3909 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3910 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3911 } 3912 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3913 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3914 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3915 } 3916 } 3917 ierr = MatDestroy(&B);CHKERRQ(ierr); 3918 /* coarse basis functions */ 3919 for (i=0;i<n_constraints;i++) { 3920 PetscScalar *y; 3921 3922 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3923 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3924 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3925 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3926 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3927 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3928 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3929 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3930 PetscInt j; 3931 3932 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3933 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3934 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3935 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3936 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3937 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3938 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3939 } 3940 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3941 } 3942 } 3943 if (n_constraints) { 3944 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3945 } 3946 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3947 3948 /* coarse matrix entries relative to B_0 */ 3949 if (pcbddc->benign_n) { 3950 Mat B0_B,B0_BPHI; 3951 IS is_dummy; 3952 PetscScalar *data; 3953 PetscInt j; 3954 3955 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3956 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3957 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3958 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3959 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3960 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3961 for (j=0;j<pcbddc->benign_n;j++) { 3962 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3963 for (i=0;i<pcbddc->local_primal_size;i++) { 3964 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3965 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3966 } 3967 } 3968 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3969 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3970 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3971 } 3972 3973 /* compute other basis functions for non-symmetric problems */ 3974 if (!pcbddc->symmetric_primal) { 3975 Mat B_V=NULL,B_C=NULL; 3976 PetscScalar *marray; 3977 3978 if (n_constraints) { 3979 Mat S_CCT,C_CRT; 3980 3981 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3982 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3983 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3984 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3985 if (n_vertices) { 3986 Mat S_VCT; 3987 3988 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3989 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3990 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3991 } 3992 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3993 } else { 3994 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3995 } 3996 if (n_vertices && n_R) { 3997 PetscScalar *av,*marray; 3998 const PetscInt *xadj,*adjncy; 3999 PetscInt n; 4000 PetscBool flg_row; 4001 4002 /* B_V = B_V - A_VR^T */ 4003 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4004 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4005 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4006 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4007 for (i=0;i<n;i++) { 4008 PetscInt j; 4009 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4010 } 4011 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4012 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4013 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4014 } 4015 4016 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4017 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4018 for (i=0;i<n_vertices;i++) { 4019 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4020 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4021 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4022 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4023 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4024 } 4025 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4026 if (B_C) { 4027 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4028 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4029 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4030 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4031 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4032 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4033 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4034 } 4035 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4036 } 4037 /* coarse basis functions */ 4038 for (i=0;i<pcbddc->local_primal_size;i++) { 4039 PetscScalar *y; 4040 4041 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4042 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4043 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4044 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4045 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4046 if (i<n_vertices) { 4047 y[n_B*i+idx_V_B[i]] = 1.0; 4048 } 4049 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4050 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4051 4052 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4053 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4054 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4055 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4056 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4057 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4058 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4059 } 4060 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4061 } 4062 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4063 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4064 } 4065 /* free memory */ 4066 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4067 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4068 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4069 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4070 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4071 ierr = PetscFree(work);CHKERRQ(ierr); 4072 if (n_vertices) { 4073 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4074 } 4075 if (n_constraints) { 4076 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4077 } 4078 /* Checking coarse_sub_mat and coarse basis functios */ 4079 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4080 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4081 if (pcbddc->dbg_flag) { 4082 Mat coarse_sub_mat; 4083 Mat AUXMAT,TM1,TM2,TM3,TM4; 4084 Mat coarse_phi_D,coarse_phi_B; 4085 Mat coarse_psi_D,coarse_psi_B; 4086 Mat A_II,A_BB,A_IB,A_BI; 4087 Mat C_B,CPHI; 4088 IS is_dummy; 4089 Vec mones; 4090 MatType checkmattype=MATSEQAIJ; 4091 PetscReal real_value; 4092 4093 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4094 Mat A; 4095 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4096 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4097 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4098 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4099 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4100 ierr = MatDestroy(&A);CHKERRQ(ierr); 4101 } else { 4102 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4103 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4104 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4105 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4106 } 4107 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4108 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4109 if (!pcbddc->symmetric_primal) { 4110 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4111 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4112 } 4113 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4114 4115 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4116 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4117 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4118 if (!pcbddc->symmetric_primal) { 4119 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4120 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4121 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4122 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4123 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4124 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4125 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4126 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4127 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4128 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4129 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4130 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4131 } else { 4132 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4133 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4134 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4135 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4136 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4137 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4138 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4139 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4140 } 4141 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4142 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4143 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4144 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4145 if (pcbddc->benign_n) { 4146 Mat B0_B,B0_BPHI; 4147 PetscScalar *data,*data2; 4148 PetscInt j; 4149 4150 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4151 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4152 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4153 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4154 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4155 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4156 for (j=0;j<pcbddc->benign_n;j++) { 4157 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4158 for (i=0;i<pcbddc->local_primal_size;i++) { 4159 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4160 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4161 } 4162 } 4163 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4164 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4165 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4166 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4167 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4168 } 4169 #if 0 4170 { 4171 PetscViewer viewer; 4172 char filename[256]; 4173 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4174 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4175 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4176 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4177 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4178 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4179 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4180 if (save_change) { 4181 Mat phi_B; 4182 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4183 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4184 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4185 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4186 } else { 4187 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4188 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4189 } 4190 if (pcbddc->coarse_phi_D) { 4191 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4192 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4193 } 4194 if (pcbddc->coarse_psi_B) { 4195 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4196 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4197 } 4198 if (pcbddc->coarse_psi_D) { 4199 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4200 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4201 } 4202 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4203 } 4204 #endif 4205 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4206 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4207 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4208 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4209 4210 /* check constraints */ 4211 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4212 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4213 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4214 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4215 } else { 4216 PetscScalar *data; 4217 Mat tmat; 4218 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4219 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4220 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4221 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4222 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4223 } 4224 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4225 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4226 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4227 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4228 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4229 if (!pcbddc->symmetric_primal) { 4230 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4231 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4232 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4233 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4234 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4235 } 4236 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4237 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4238 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4239 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4240 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4241 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4242 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4243 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4244 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4245 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4246 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4247 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4248 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4249 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4250 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4251 if (!pcbddc->symmetric_primal) { 4252 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4253 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4254 } 4255 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4256 } 4257 /* get back data */ 4258 *coarse_submat_vals_n = coarse_submat_vals; 4259 PetscFunctionReturn(0); 4260 } 4261 4262 #undef __FUNCT__ 4263 #define __FUNCT__ "MatGetSubMatrixUnsorted" 4264 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4265 { 4266 Mat *work_mat; 4267 IS isrow_s,iscol_s; 4268 PetscBool rsorted,csorted; 4269 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4270 PetscErrorCode ierr; 4271 4272 PetscFunctionBegin; 4273 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4274 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4275 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4276 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4277 4278 if (!rsorted) { 4279 const PetscInt *idxs; 4280 PetscInt *idxs_sorted,i; 4281 4282 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4283 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4284 for (i=0;i<rsize;i++) { 4285 idxs_perm_r[i] = i; 4286 } 4287 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4288 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4289 for (i=0;i<rsize;i++) { 4290 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4291 } 4292 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4293 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4294 } else { 4295 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4296 isrow_s = isrow; 4297 } 4298 4299 if (!csorted) { 4300 if (isrow == iscol) { 4301 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4302 iscol_s = isrow_s; 4303 } else { 4304 const PetscInt *idxs; 4305 PetscInt *idxs_sorted,i; 4306 4307 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4308 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4309 for (i=0;i<csize;i++) { 4310 idxs_perm_c[i] = i; 4311 } 4312 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4313 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4314 for (i=0;i<csize;i++) { 4315 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4316 } 4317 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4318 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4319 } 4320 } else { 4321 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4322 iscol_s = iscol; 4323 } 4324 4325 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4326 4327 if (!rsorted || !csorted) { 4328 Mat new_mat; 4329 IS is_perm_r,is_perm_c; 4330 4331 if (!rsorted) { 4332 PetscInt *idxs_r,i; 4333 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4334 for (i=0;i<rsize;i++) { 4335 idxs_r[idxs_perm_r[i]] = i; 4336 } 4337 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4338 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4339 } else { 4340 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4341 } 4342 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4343 4344 if (!csorted) { 4345 if (isrow_s == iscol_s) { 4346 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4347 is_perm_c = is_perm_r; 4348 } else { 4349 PetscInt *idxs_c,i; 4350 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4351 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4352 for (i=0;i<csize;i++) { 4353 idxs_c[idxs_perm_c[i]] = i; 4354 } 4355 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4356 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4357 } 4358 } else { 4359 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4360 } 4361 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4362 4363 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4364 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4365 work_mat[0] = new_mat; 4366 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4367 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4368 } 4369 4370 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4371 *B = work_mat[0]; 4372 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4373 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4374 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4375 PetscFunctionReturn(0); 4376 } 4377 4378 #undef __FUNCT__ 4379 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4380 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4381 { 4382 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4383 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4384 Mat new_mat; 4385 IS is_local,is_global; 4386 PetscInt local_size; 4387 PetscBool isseqaij; 4388 PetscErrorCode ierr; 4389 4390 PetscFunctionBegin; 4391 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4392 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4393 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4394 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4395 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4396 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4397 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4398 4399 /* check */ 4400 if (pcbddc->dbg_flag) { 4401 Vec x,x_change; 4402 PetscReal error; 4403 4404 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4405 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4406 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4407 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4408 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4409 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4410 if (!pcbddc->change_interior) { 4411 const PetscScalar *x,*y,*v; 4412 PetscReal lerror = 0.; 4413 PetscInt i; 4414 4415 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4416 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4417 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4418 for (i=0;i<local_size;i++) 4419 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4420 lerror = PetscAbsScalar(x[i]-y[i]); 4421 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4422 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4423 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4424 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4425 if (error > PETSC_SMALL) { 4426 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4427 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4428 } else { 4429 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4430 } 4431 } 4432 } 4433 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4434 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4435 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4436 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4437 if (error > PETSC_SMALL) { 4438 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4439 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4440 } else { 4441 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4442 } 4443 } 4444 ierr = VecDestroy(&x);CHKERRQ(ierr); 4445 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4446 } 4447 4448 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4449 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4450 if (isseqaij) { 4451 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4452 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4453 } else { 4454 Mat work_mat; 4455 4456 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4457 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4458 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4459 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4460 } 4461 if (matis->A->symmetric_set) { 4462 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4463 #if !defined(PETSC_USE_COMPLEX) 4464 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4465 #endif 4466 } 4467 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4468 PetscFunctionReturn(0); 4469 } 4470 4471 #undef __FUNCT__ 4472 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4473 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4474 { 4475 PC_IS* pcis = (PC_IS*)(pc->data); 4476 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4477 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4478 PetscInt *idx_R_local=NULL; 4479 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4480 PetscInt vbs,bs; 4481 PetscBT bitmask=NULL; 4482 PetscErrorCode ierr; 4483 4484 PetscFunctionBegin; 4485 /* 4486 No need to setup local scatters if 4487 - primal space is unchanged 4488 AND 4489 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4490 AND 4491 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4492 */ 4493 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4494 PetscFunctionReturn(0); 4495 } 4496 /* destroy old objects */ 4497 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4498 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4499 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4500 /* Set Non-overlapping dimensions */ 4501 n_B = pcis->n_B; 4502 n_D = pcis->n - n_B; 4503 n_vertices = pcbddc->n_vertices; 4504 4505 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4506 4507 /* create auxiliary bitmask and allocate workspace */ 4508 if (!sub_schurs || !sub_schurs->reuse_solver) { 4509 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4510 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4511 for (i=0;i<n_vertices;i++) { 4512 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4513 } 4514 4515 for (i=0, n_R=0; i<pcis->n; i++) { 4516 if (!PetscBTLookup(bitmask,i)) { 4517 idx_R_local[n_R++] = i; 4518 } 4519 } 4520 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4521 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4522 4523 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4524 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4525 } 4526 4527 /* Block code */ 4528 vbs = 1; 4529 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4530 if (bs>1 && !(n_vertices%bs)) { 4531 PetscBool is_blocked = PETSC_TRUE; 4532 PetscInt *vary; 4533 if (!sub_schurs || !sub_schurs->reuse_solver) { 4534 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4535 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4536 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4537 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4538 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4539 for (i=0; i<pcis->n/bs; i++) { 4540 if (vary[i]!=0 && vary[i]!=bs) { 4541 is_blocked = PETSC_FALSE; 4542 break; 4543 } 4544 } 4545 ierr = PetscFree(vary);CHKERRQ(ierr); 4546 } else { 4547 /* Verify directly the R set */ 4548 for (i=0; i<n_R/bs; i++) { 4549 PetscInt j,node=idx_R_local[bs*i]; 4550 for (j=1; j<bs; j++) { 4551 if (node != idx_R_local[bs*i+j]-j) { 4552 is_blocked = PETSC_FALSE; 4553 break; 4554 } 4555 } 4556 } 4557 } 4558 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4559 vbs = bs; 4560 for (i=0;i<n_R/vbs;i++) { 4561 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4562 } 4563 } 4564 } 4565 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4566 if (sub_schurs && sub_schurs->reuse_solver) { 4567 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4568 4569 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4570 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4571 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4572 reuse_solver->is_R = pcbddc->is_R_local; 4573 } else { 4574 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4575 } 4576 4577 /* print some info if requested */ 4578 if (pcbddc->dbg_flag) { 4579 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4580 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4581 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4582 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4583 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4584 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4585 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4586 } 4587 4588 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4589 if (!sub_schurs || !sub_schurs->reuse_solver) { 4590 IS is_aux1,is_aux2; 4591 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4592 4593 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4594 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4595 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4596 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4597 for (i=0; i<n_D; i++) { 4598 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4599 } 4600 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4601 for (i=0, j=0; i<n_R; i++) { 4602 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4603 aux_array1[j++] = i; 4604 } 4605 } 4606 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4607 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4608 for (i=0, j=0; i<n_B; i++) { 4609 if (!PetscBTLookup(bitmask,is_indices[i])) { 4610 aux_array2[j++] = i; 4611 } 4612 } 4613 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4614 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4615 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4616 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4617 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4618 4619 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4620 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4621 for (i=0, j=0; i<n_R; i++) { 4622 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4623 aux_array1[j++] = i; 4624 } 4625 } 4626 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4627 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4628 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4629 } 4630 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4631 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4632 } else { 4633 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4634 IS tis; 4635 PetscInt schur_size; 4636 4637 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4638 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4639 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4640 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4641 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4642 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4643 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4644 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4645 } 4646 } 4647 PetscFunctionReturn(0); 4648 } 4649 4650 4651 #undef __FUNCT__ 4652 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4653 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4654 { 4655 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4656 PC_IS *pcis = (PC_IS*)pc->data; 4657 PC pc_temp; 4658 Mat A_RR; 4659 MatReuse reuse; 4660 PetscScalar m_one = -1.0; 4661 PetscReal value; 4662 PetscInt n_D,n_R; 4663 PetscBool check_corr[2],issbaij; 4664 PetscErrorCode ierr; 4665 /* prefixes stuff */ 4666 char dir_prefix[256],neu_prefix[256],str_level[16]; 4667 size_t len; 4668 4669 PetscFunctionBegin; 4670 4671 /* compute prefixes */ 4672 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4673 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4674 if (!pcbddc->current_level) { 4675 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4676 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4677 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4678 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4679 } else { 4680 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4681 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4682 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4683 len -= 15; /* remove "pc_bddc_coarse_" */ 4684 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4685 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4686 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4687 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4688 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4689 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4690 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4691 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4692 } 4693 4694 /* DIRICHLET PROBLEM */ 4695 if (dirichlet) { 4696 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4697 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4698 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4699 if (pcbddc->dbg_flag) { 4700 Mat A_IIn; 4701 4702 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4703 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4704 pcis->A_II = A_IIn; 4705 } 4706 } 4707 if (pcbddc->local_mat->symmetric_set) { 4708 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4709 } 4710 /* Matrix for Dirichlet problem is pcis->A_II */ 4711 n_D = pcis->n - pcis->n_B; 4712 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4713 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4714 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4715 /* default */ 4716 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4717 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4718 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4719 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4720 if (issbaij) { 4721 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4722 } else { 4723 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4724 } 4725 /* Allow user's customization */ 4726 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4727 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4728 } 4729 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4730 if (sub_schurs && sub_schurs->reuse_solver) { 4731 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4732 4733 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4734 } 4735 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4736 if (!n_D) { 4737 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4738 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4739 } 4740 /* Set Up KSP for Dirichlet problem of BDDC */ 4741 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4742 /* set ksp_D into pcis data */ 4743 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4744 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4745 pcis->ksp_D = pcbddc->ksp_D; 4746 } 4747 4748 /* NEUMANN PROBLEM */ 4749 A_RR = 0; 4750 if (neumann) { 4751 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4752 PetscInt ibs,mbs; 4753 PetscBool issbaij; 4754 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4755 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4756 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4757 if (pcbddc->ksp_R) { /* already created ksp */ 4758 PetscInt nn_R; 4759 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4760 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4761 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4762 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4763 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4764 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4765 reuse = MAT_INITIAL_MATRIX; 4766 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4767 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4768 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4769 reuse = MAT_INITIAL_MATRIX; 4770 } else { /* safe to reuse the matrix */ 4771 reuse = MAT_REUSE_MATRIX; 4772 } 4773 } 4774 /* last check */ 4775 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4776 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4777 reuse = MAT_INITIAL_MATRIX; 4778 } 4779 } else { /* first time, so we need to create the matrix */ 4780 reuse = MAT_INITIAL_MATRIX; 4781 } 4782 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4783 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4784 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4785 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4786 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4787 if (matis->A == pcbddc->local_mat) { 4788 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4789 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4790 } else { 4791 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4792 } 4793 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4794 if (matis->A == pcbddc->local_mat) { 4795 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4796 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4797 } else { 4798 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4799 } 4800 } 4801 /* extract A_RR */ 4802 if (sub_schurs && sub_schurs->reuse_solver) { 4803 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4804 4805 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4806 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4807 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4808 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4809 } else { 4810 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4811 } 4812 } else { 4813 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4814 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4815 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4816 } 4817 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4818 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4819 } 4820 if (pcbddc->local_mat->symmetric_set) { 4821 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4822 } 4823 if (!pcbddc->ksp_R) { /* create object if not present */ 4824 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4825 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4826 /* default */ 4827 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4828 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4829 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4830 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4831 if (issbaij) { 4832 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4833 } else { 4834 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4835 } 4836 /* Allow user's customization */ 4837 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4838 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4839 } 4840 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4841 if (!n_R) { 4842 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4843 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4844 } 4845 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4846 /* Reuse solver if it is present */ 4847 if (sub_schurs && sub_schurs->reuse_solver) { 4848 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4849 4850 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4851 } 4852 /* Set Up KSP for Neumann problem of BDDC */ 4853 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4854 } 4855 4856 if (pcbddc->dbg_flag) { 4857 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4858 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4859 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4860 } 4861 4862 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4863 check_corr[0] = check_corr[1] = PETSC_FALSE; 4864 if (pcbddc->NullSpace_corr[0]) { 4865 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4866 } 4867 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4868 check_corr[0] = PETSC_TRUE; 4869 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4870 } 4871 if (neumann && pcbddc->NullSpace_corr[2]) { 4872 check_corr[1] = PETSC_TRUE; 4873 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4874 } 4875 4876 /* check Dirichlet and Neumann solvers */ 4877 if (pcbddc->dbg_flag) { 4878 if (dirichlet) { /* Dirichlet */ 4879 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4880 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4881 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4882 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4883 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4884 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4885 if (check_corr[0]) { 4886 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4887 } 4888 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4889 } 4890 if (neumann) { /* Neumann */ 4891 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4892 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4893 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4894 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4895 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4897 if (check_corr[1]) { 4898 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4899 } 4900 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4901 } 4902 } 4903 /* free Neumann problem's matrix */ 4904 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4905 PetscFunctionReturn(0); 4906 } 4907 4908 #undef __FUNCT__ 4909 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4910 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4911 { 4912 PetscErrorCode ierr; 4913 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4914 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4915 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4916 4917 PetscFunctionBegin; 4918 if (!reuse_solver) { 4919 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4920 } 4921 if (!pcbddc->switch_static) { 4922 if (applytranspose && pcbddc->local_auxmat1) { 4923 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4924 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4925 } 4926 if (!reuse_solver) { 4927 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4928 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4929 } else { 4930 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4931 4932 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4933 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4934 } 4935 } else { 4936 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4937 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4938 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4939 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4940 if (applytranspose && pcbddc->local_auxmat1) { 4941 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4942 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4943 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4944 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4945 } 4946 } 4947 if (!reuse_solver || pcbddc->switch_static) { 4948 if (applytranspose) { 4949 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4950 } else { 4951 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4952 } 4953 } else { 4954 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4955 4956 if (applytranspose) { 4957 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4958 } else { 4959 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4960 } 4961 } 4962 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4963 if (!pcbddc->switch_static) { 4964 if (!reuse_solver) { 4965 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4966 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4967 } else { 4968 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4969 4970 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4971 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4972 } 4973 if (!applytranspose && pcbddc->local_auxmat1) { 4974 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4975 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4976 } 4977 } else { 4978 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4979 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4980 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4981 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4982 if (!applytranspose && pcbddc->local_auxmat1) { 4983 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4984 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4985 } 4986 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4987 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4988 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4989 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4990 } 4991 PetscFunctionReturn(0); 4992 } 4993 4994 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4995 #undef __FUNCT__ 4996 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4997 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4998 { 4999 PetscErrorCode ierr; 5000 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5001 PC_IS* pcis = (PC_IS*) (pc->data); 5002 const PetscScalar zero = 0.0; 5003 5004 PetscFunctionBegin; 5005 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5006 if (!pcbddc->benign_apply_coarse_only) { 5007 if (applytranspose) { 5008 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5009 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5010 } else { 5011 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5012 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5013 } 5014 } else { 5015 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5016 } 5017 5018 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5019 if (pcbddc->benign_n) { 5020 PetscScalar *array; 5021 PetscInt j; 5022 5023 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5024 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5025 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5026 } 5027 5028 /* start communications from local primal nodes to rhs of coarse solver */ 5029 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5030 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5031 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5032 5033 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5034 if (pcbddc->coarse_ksp) { 5035 Mat coarse_mat; 5036 Vec rhs,sol; 5037 MatNullSpace nullsp; 5038 PetscBool isbddc = PETSC_FALSE; 5039 5040 if (pcbddc->benign_have_null) { 5041 PC coarse_pc; 5042 5043 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5044 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5045 /* we need to propagate to coarser levels the need for a possible benign correction */ 5046 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5047 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5048 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5049 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5050 } 5051 } 5052 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5053 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5054 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5055 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5056 if (nullsp) { 5057 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5058 } 5059 if (applytranspose) { 5060 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5061 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5062 } else { 5063 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5064 PC coarse_pc; 5065 5066 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5067 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5068 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5069 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5070 } else { 5071 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5072 } 5073 } 5074 /* we don't need the benign correction at coarser levels anymore */ 5075 if (pcbddc->benign_have_null && isbddc) { 5076 PC coarse_pc; 5077 PC_BDDC* coarsepcbddc; 5078 5079 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5080 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5081 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5082 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5083 } 5084 if (nullsp) { 5085 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5086 } 5087 } 5088 5089 /* Local solution on R nodes */ 5090 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5091 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5092 } 5093 /* communications from coarse sol to local primal nodes */ 5094 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5095 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5096 5097 /* Sum contributions from the two levels */ 5098 if (!pcbddc->benign_apply_coarse_only) { 5099 if (applytranspose) { 5100 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5101 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5102 } else { 5103 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5104 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5105 } 5106 /* store p0 */ 5107 if (pcbddc->benign_n) { 5108 PetscScalar *array; 5109 PetscInt j; 5110 5111 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5112 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5113 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5114 } 5115 } else { /* expand the coarse solution */ 5116 if (applytranspose) { 5117 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5118 } else { 5119 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5120 } 5121 } 5122 PetscFunctionReturn(0); 5123 } 5124 5125 #undef __FUNCT__ 5126 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 5127 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5128 { 5129 PetscErrorCode ierr; 5130 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5131 PetscScalar *array; 5132 Vec from,to; 5133 5134 PetscFunctionBegin; 5135 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5136 from = pcbddc->coarse_vec; 5137 to = pcbddc->vec1_P; 5138 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5139 Vec tvec; 5140 5141 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5142 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5143 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5144 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5145 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5146 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5147 } 5148 } else { /* from local to global -> put data in coarse right hand side */ 5149 from = pcbddc->vec1_P; 5150 to = pcbddc->coarse_vec; 5151 } 5152 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5153 PetscFunctionReturn(0); 5154 } 5155 5156 #undef __FUNCT__ 5157 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 5158 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5159 { 5160 PetscErrorCode ierr; 5161 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5162 PetscScalar *array; 5163 Vec from,to; 5164 5165 PetscFunctionBegin; 5166 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5167 from = pcbddc->coarse_vec; 5168 to = pcbddc->vec1_P; 5169 } else { /* from local to global -> put data in coarse right hand side */ 5170 from = pcbddc->vec1_P; 5171 to = pcbddc->coarse_vec; 5172 } 5173 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5174 if (smode == SCATTER_FORWARD) { 5175 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5176 Vec tvec; 5177 5178 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5179 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5180 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5181 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5182 } 5183 } else { 5184 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5185 ierr = VecResetArray(from);CHKERRQ(ierr); 5186 } 5187 } 5188 PetscFunctionReturn(0); 5189 } 5190 5191 /* uncomment for testing purposes */ 5192 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5193 #undef __FUNCT__ 5194 #define __FUNCT__ "PCBDDCConstraintsSetUp" 5195 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5196 { 5197 PetscErrorCode ierr; 5198 PC_IS* pcis = (PC_IS*)(pc->data); 5199 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5200 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5201 /* one and zero */ 5202 PetscScalar one=1.0,zero=0.0; 5203 /* space to store constraints and their local indices */ 5204 PetscScalar *constraints_data; 5205 PetscInt *constraints_idxs,*constraints_idxs_B; 5206 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5207 PetscInt *constraints_n; 5208 /* iterators */ 5209 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5210 /* BLAS integers */ 5211 PetscBLASInt lwork,lierr; 5212 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5213 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5214 /* reuse */ 5215 PetscInt olocal_primal_size,olocal_primal_size_cc; 5216 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5217 /* change of basis */ 5218 PetscBool qr_needed; 5219 PetscBT change_basis,qr_needed_idx; 5220 /* auxiliary stuff */ 5221 PetscInt *nnz,*is_indices; 5222 PetscInt ncc; 5223 /* some quantities */ 5224 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5225 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5226 5227 PetscFunctionBegin; 5228 /* Destroy Mat objects computed previously */ 5229 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5230 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5231 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5232 /* save info on constraints from previous setup (if any) */ 5233 olocal_primal_size = pcbddc->local_primal_size; 5234 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5235 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5236 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5237 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5238 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5239 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5240 5241 if (!pcbddc->adaptive_selection) { 5242 IS ISForVertices,*ISForFaces,*ISForEdges; 5243 MatNullSpace nearnullsp; 5244 const Vec *nearnullvecs; 5245 Vec *localnearnullsp; 5246 PetscScalar *array; 5247 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5248 PetscBool nnsp_has_cnst; 5249 /* LAPACK working arrays for SVD or POD */ 5250 PetscBool skip_lapack,boolforchange; 5251 PetscScalar *work; 5252 PetscReal *singular_vals; 5253 #if defined(PETSC_USE_COMPLEX) 5254 PetscReal *rwork; 5255 #endif 5256 #if defined(PETSC_MISSING_LAPACK_GESVD) 5257 PetscScalar *temp_basis,*correlation_mat; 5258 #else 5259 PetscBLASInt dummy_int=1; 5260 PetscScalar dummy_scalar=1.; 5261 #endif 5262 5263 /* Get index sets for faces, edges and vertices from graph */ 5264 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5265 /* print some info */ 5266 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5267 PetscInt nv; 5268 5269 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5270 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5271 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5273 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5277 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5278 } 5279 5280 /* free unneeded index sets */ 5281 if (!pcbddc->use_vertices) { 5282 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5283 } 5284 if (!pcbddc->use_edges) { 5285 for (i=0;i<n_ISForEdges;i++) { 5286 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5287 } 5288 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5289 n_ISForEdges = 0; 5290 } 5291 if (!pcbddc->use_faces) { 5292 for (i=0;i<n_ISForFaces;i++) { 5293 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5294 } 5295 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5296 n_ISForFaces = 0; 5297 } 5298 5299 /* check if near null space is attached to global mat */ 5300 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5301 if (nearnullsp) { 5302 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5303 /* remove any stored info */ 5304 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5305 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5306 /* store information for BDDC solver reuse */ 5307 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5308 pcbddc->onearnullspace = nearnullsp; 5309 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5310 for (i=0;i<nnsp_size;i++) { 5311 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5312 } 5313 } else { /* if near null space is not provided BDDC uses constants by default */ 5314 nnsp_size = 0; 5315 nnsp_has_cnst = PETSC_TRUE; 5316 } 5317 /* get max number of constraints on a single cc */ 5318 max_constraints = nnsp_size; 5319 if (nnsp_has_cnst) max_constraints++; 5320 5321 /* 5322 Evaluate maximum storage size needed by the procedure 5323 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5324 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5325 There can be multiple constraints per connected component 5326 */ 5327 n_vertices = 0; 5328 if (ISForVertices) { 5329 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5330 } 5331 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5332 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5333 5334 total_counts = n_ISForFaces+n_ISForEdges; 5335 total_counts *= max_constraints; 5336 total_counts += n_vertices; 5337 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5338 5339 total_counts = 0; 5340 max_size_of_constraint = 0; 5341 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5342 IS used_is; 5343 if (i<n_ISForEdges) { 5344 used_is = ISForEdges[i]; 5345 } else { 5346 used_is = ISForFaces[i-n_ISForEdges]; 5347 } 5348 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5349 total_counts += j; 5350 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5351 } 5352 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5353 5354 /* get local part of global near null space vectors */ 5355 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5356 for (k=0;k<nnsp_size;k++) { 5357 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5358 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5359 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5360 } 5361 5362 /* whether or not to skip lapack calls */ 5363 skip_lapack = PETSC_TRUE; 5364 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5365 5366 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5367 if (!skip_lapack) { 5368 PetscScalar temp_work; 5369 5370 #if defined(PETSC_MISSING_LAPACK_GESVD) 5371 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5372 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5373 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5374 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5375 #if defined(PETSC_USE_COMPLEX) 5376 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5377 #endif 5378 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5379 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5380 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5381 lwork = -1; 5382 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5383 #if !defined(PETSC_USE_COMPLEX) 5384 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5385 #else 5386 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5387 #endif 5388 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5389 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5390 #else /* on missing GESVD */ 5391 /* SVD */ 5392 PetscInt max_n,min_n; 5393 max_n = max_size_of_constraint; 5394 min_n = max_constraints; 5395 if (max_size_of_constraint < max_constraints) { 5396 min_n = max_size_of_constraint; 5397 max_n = max_constraints; 5398 } 5399 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5400 #if defined(PETSC_USE_COMPLEX) 5401 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5402 #endif 5403 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5404 lwork = -1; 5405 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5406 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5407 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5408 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5409 #if !defined(PETSC_USE_COMPLEX) 5410 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5411 #else 5412 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5413 #endif 5414 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5415 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5416 #endif /* on missing GESVD */ 5417 /* Allocate optimal workspace */ 5418 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5419 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5420 } 5421 /* Now we can loop on constraining sets */ 5422 total_counts = 0; 5423 constraints_idxs_ptr[0] = 0; 5424 constraints_data_ptr[0] = 0; 5425 /* vertices */ 5426 if (n_vertices) { 5427 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5428 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5429 for (i=0;i<n_vertices;i++) { 5430 constraints_n[total_counts] = 1; 5431 constraints_data[total_counts] = 1.0; 5432 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5433 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5434 total_counts++; 5435 } 5436 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5437 n_vertices = total_counts; 5438 } 5439 5440 /* edges and faces */ 5441 total_counts_cc = total_counts; 5442 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5443 IS used_is; 5444 PetscBool idxs_copied = PETSC_FALSE; 5445 5446 if (ncc<n_ISForEdges) { 5447 used_is = ISForEdges[ncc]; 5448 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5449 } else { 5450 used_is = ISForFaces[ncc-n_ISForEdges]; 5451 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5452 } 5453 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5454 5455 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5456 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5457 /* change of basis should not be performed on local periodic nodes */ 5458 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5459 if (nnsp_has_cnst) { 5460 PetscScalar quad_value; 5461 5462 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5463 idxs_copied = PETSC_TRUE; 5464 5465 if (!pcbddc->use_nnsp_true) { 5466 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5467 } else { 5468 quad_value = 1.0; 5469 } 5470 for (j=0;j<size_of_constraint;j++) { 5471 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5472 } 5473 temp_constraints++; 5474 total_counts++; 5475 } 5476 for (k=0;k<nnsp_size;k++) { 5477 PetscReal real_value; 5478 PetscScalar *ptr_to_data; 5479 5480 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5481 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5482 for (j=0;j<size_of_constraint;j++) { 5483 ptr_to_data[j] = array[is_indices[j]]; 5484 } 5485 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5486 /* check if array is null on the connected component */ 5487 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5488 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5489 if (real_value > 0.0) { /* keep indices and values */ 5490 temp_constraints++; 5491 total_counts++; 5492 if (!idxs_copied) { 5493 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5494 idxs_copied = PETSC_TRUE; 5495 } 5496 } 5497 } 5498 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5499 valid_constraints = temp_constraints; 5500 if (!pcbddc->use_nnsp_true && temp_constraints) { 5501 if (temp_constraints == 1) { /* just normalize the constraint */ 5502 PetscScalar norm,*ptr_to_data; 5503 5504 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5506 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5507 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5508 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5509 } else { /* perform SVD */ 5510 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5511 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5512 5513 #if defined(PETSC_MISSING_LAPACK_GESVD) 5514 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5515 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5516 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5517 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5518 from that computed using LAPACKgesvd 5519 -> This is due to a different computation of eigenvectors in LAPACKheev 5520 -> The quality of the POD-computed basis will be the same */ 5521 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5522 /* Store upper triangular part of correlation matrix */ 5523 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5524 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5525 for (j=0;j<temp_constraints;j++) { 5526 for (k=0;k<j+1;k++) { 5527 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5528 } 5529 } 5530 /* compute eigenvalues and eigenvectors of correlation matrix */ 5531 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5532 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5533 #if !defined(PETSC_USE_COMPLEX) 5534 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5535 #else 5536 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5537 #endif 5538 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5539 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5540 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5541 j = 0; 5542 while (j < temp_constraints && singular_vals[j] < tol) j++; 5543 total_counts = total_counts-j; 5544 valid_constraints = temp_constraints-j; 5545 /* scale and copy POD basis into used quadrature memory */ 5546 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5547 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5548 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5549 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5550 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5551 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5552 if (j<temp_constraints) { 5553 PetscInt ii; 5554 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5555 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5556 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5557 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5558 for (k=0;k<temp_constraints-j;k++) { 5559 for (ii=0;ii<size_of_constraint;ii++) { 5560 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5561 } 5562 } 5563 } 5564 #else /* on missing GESVD */ 5565 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5566 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5567 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5568 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5569 #if !defined(PETSC_USE_COMPLEX) 5570 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5571 #else 5572 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5573 #endif 5574 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5575 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5576 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5577 k = temp_constraints; 5578 if (k > size_of_constraint) k = size_of_constraint; 5579 j = 0; 5580 while (j < k && singular_vals[k-j-1] < tol) j++; 5581 valid_constraints = k-j; 5582 total_counts = total_counts-temp_constraints+valid_constraints; 5583 #endif /* on missing GESVD */ 5584 } 5585 } 5586 /* update pointers information */ 5587 if (valid_constraints) { 5588 constraints_n[total_counts_cc] = valid_constraints; 5589 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5590 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5591 /* set change_of_basis flag */ 5592 if (boolforchange) { 5593 PetscBTSet(change_basis,total_counts_cc); 5594 } 5595 total_counts_cc++; 5596 } 5597 } 5598 /* free workspace */ 5599 if (!skip_lapack) { 5600 ierr = PetscFree(work);CHKERRQ(ierr); 5601 #if defined(PETSC_USE_COMPLEX) 5602 ierr = PetscFree(rwork);CHKERRQ(ierr); 5603 #endif 5604 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5605 #if defined(PETSC_MISSING_LAPACK_GESVD) 5606 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5607 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5608 #endif 5609 } 5610 for (k=0;k<nnsp_size;k++) { 5611 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5612 } 5613 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5614 /* free index sets of faces, edges and vertices */ 5615 for (i=0;i<n_ISForFaces;i++) { 5616 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5617 } 5618 if (n_ISForFaces) { 5619 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5620 } 5621 for (i=0;i<n_ISForEdges;i++) { 5622 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5623 } 5624 if (n_ISForEdges) { 5625 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5626 } 5627 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5628 } else { 5629 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5630 5631 total_counts = 0; 5632 n_vertices = 0; 5633 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5634 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5635 } 5636 max_constraints = 0; 5637 total_counts_cc = 0; 5638 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5639 total_counts += pcbddc->adaptive_constraints_n[i]; 5640 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5641 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5642 } 5643 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5644 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5645 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5646 constraints_data = pcbddc->adaptive_constraints_data; 5647 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5648 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5649 total_counts_cc = 0; 5650 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5651 if (pcbddc->adaptive_constraints_n[i]) { 5652 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5653 } 5654 } 5655 #if 0 5656 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5657 for (i=0;i<total_counts_cc;i++) { 5658 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5659 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5660 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5661 printf(" %d",constraints_idxs[j]); 5662 } 5663 printf("\n"); 5664 printf("number of cc: %d\n",constraints_n[i]); 5665 } 5666 for (i=0;i<n_vertices;i++) { 5667 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5668 } 5669 for (i=0;i<sub_schurs->n_subs;i++) { 5670 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5671 } 5672 #endif 5673 5674 max_size_of_constraint = 0; 5675 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5676 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5677 /* Change of basis */ 5678 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5679 if (pcbddc->use_change_of_basis) { 5680 for (i=0;i<sub_schurs->n_subs;i++) { 5681 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5682 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5683 } 5684 } 5685 } 5686 } 5687 pcbddc->local_primal_size = total_counts; 5688 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5689 5690 /* map constraints_idxs in boundary numbering */ 5691 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5692 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5693 5694 /* Create constraint matrix */ 5695 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5696 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5697 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5698 5699 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5700 /* determine if a QR strategy is needed for change of basis */ 5701 qr_needed = PETSC_FALSE; 5702 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5703 total_primal_vertices=0; 5704 pcbddc->local_primal_size_cc = 0; 5705 for (i=0;i<total_counts_cc;i++) { 5706 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5707 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5708 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5709 pcbddc->local_primal_size_cc += 1; 5710 } else if (PetscBTLookup(change_basis,i)) { 5711 for (k=0;k<constraints_n[i];k++) { 5712 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5713 } 5714 pcbddc->local_primal_size_cc += constraints_n[i]; 5715 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5716 PetscBTSet(qr_needed_idx,i); 5717 qr_needed = PETSC_TRUE; 5718 } 5719 } else { 5720 pcbddc->local_primal_size_cc += 1; 5721 } 5722 } 5723 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5724 pcbddc->n_vertices = total_primal_vertices; 5725 /* permute indices in order to have a sorted set of vertices */ 5726 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5727 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5728 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5729 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5730 5731 /* nonzero structure of constraint matrix */ 5732 /* and get reference dof for local constraints */ 5733 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5734 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5735 5736 j = total_primal_vertices; 5737 total_counts = total_primal_vertices; 5738 cum = total_primal_vertices; 5739 for (i=n_vertices;i<total_counts_cc;i++) { 5740 if (!PetscBTLookup(change_basis,i)) { 5741 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5742 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5743 cum++; 5744 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5745 for (k=0;k<constraints_n[i];k++) { 5746 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5747 nnz[j+k] = size_of_constraint; 5748 } 5749 j += constraints_n[i]; 5750 } 5751 } 5752 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5753 ierr = PetscFree(nnz);CHKERRQ(ierr); 5754 5755 /* set values in constraint matrix */ 5756 for (i=0;i<total_primal_vertices;i++) { 5757 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5758 } 5759 total_counts = total_primal_vertices; 5760 for (i=n_vertices;i<total_counts_cc;i++) { 5761 if (!PetscBTLookup(change_basis,i)) { 5762 PetscInt *cols; 5763 5764 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5765 cols = constraints_idxs+constraints_idxs_ptr[i]; 5766 for (k=0;k<constraints_n[i];k++) { 5767 PetscInt row = total_counts+k; 5768 PetscScalar *vals; 5769 5770 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5771 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5772 } 5773 total_counts += constraints_n[i]; 5774 } 5775 } 5776 /* assembling */ 5777 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5778 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5779 5780 /* 5781 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5782 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5783 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5784 */ 5785 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5786 if (pcbddc->use_change_of_basis) { 5787 /* dual and primal dofs on a single cc */ 5788 PetscInt dual_dofs,primal_dofs; 5789 /* working stuff for GEQRF */ 5790 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5791 PetscBLASInt lqr_work; 5792 /* working stuff for UNGQR */ 5793 PetscScalar *gqr_work,lgqr_work_t; 5794 PetscBLASInt lgqr_work; 5795 /* working stuff for TRTRS */ 5796 PetscScalar *trs_rhs; 5797 PetscBLASInt Blas_NRHS; 5798 /* pointers for values insertion into change of basis matrix */ 5799 PetscInt *start_rows,*start_cols; 5800 PetscScalar *start_vals; 5801 /* working stuff for values insertion */ 5802 PetscBT is_primal; 5803 PetscInt *aux_primal_numbering_B; 5804 /* matrix sizes */ 5805 PetscInt global_size,local_size; 5806 /* temporary change of basis */ 5807 Mat localChangeOfBasisMatrix; 5808 /* extra space for debugging */ 5809 PetscScalar *dbg_work; 5810 5811 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5812 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5813 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5814 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5815 /* nonzeros for local mat */ 5816 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5817 if (!pcbddc->benign_change || pcbddc->fake_change) { 5818 for (i=0;i<pcis->n;i++) nnz[i]=1; 5819 } else { 5820 const PetscInt *ii; 5821 PetscInt n; 5822 PetscBool flg_row; 5823 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5824 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5825 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5826 } 5827 for (i=n_vertices;i<total_counts_cc;i++) { 5828 if (PetscBTLookup(change_basis,i)) { 5829 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5830 if (PetscBTLookup(qr_needed_idx,i)) { 5831 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5832 } else { 5833 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5834 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5835 } 5836 } 5837 } 5838 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5839 ierr = PetscFree(nnz);CHKERRQ(ierr); 5840 /* Set interior change in the matrix */ 5841 if (!pcbddc->benign_change || pcbddc->fake_change) { 5842 for (i=0;i<pcis->n;i++) { 5843 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5844 } 5845 } else { 5846 const PetscInt *ii,*jj; 5847 PetscScalar *aa; 5848 PetscInt n; 5849 PetscBool flg_row; 5850 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5851 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5852 for (i=0;i<n;i++) { 5853 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5854 } 5855 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5856 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5857 } 5858 5859 if (pcbddc->dbg_flag) { 5860 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5861 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5862 } 5863 5864 5865 /* Now we loop on the constraints which need a change of basis */ 5866 /* 5867 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5868 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5869 5870 Basic blocks of change of basis matrix T computed by 5871 5872 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5873 5874 | 1 0 ... 0 s_1/S | 5875 | 0 1 ... 0 s_2/S | 5876 | ... | 5877 | 0 ... 1 s_{n-1}/S | 5878 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5879 5880 with S = \sum_{i=1}^n s_i^2 5881 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5882 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5883 5884 - QR decomposition of constraints otherwise 5885 */ 5886 if (qr_needed) { 5887 /* space to store Q */ 5888 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5889 /* array to store scaling factors for reflectors */ 5890 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5891 /* first we issue queries for optimal work */ 5892 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5893 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5894 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5895 lqr_work = -1; 5896 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5897 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5898 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5899 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5900 lgqr_work = -1; 5901 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5902 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5903 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5904 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5905 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5906 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5907 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5908 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5909 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5910 /* array to store rhs and solution of triangular solver */ 5911 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5912 /* allocating workspace for check */ 5913 if (pcbddc->dbg_flag) { 5914 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5915 } 5916 } 5917 /* array to store whether a node is primal or not */ 5918 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5919 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5920 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5921 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5922 for (i=0;i<total_primal_vertices;i++) { 5923 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5924 } 5925 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5926 5927 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5928 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5929 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5930 if (PetscBTLookup(change_basis,total_counts)) { 5931 /* get constraint info */ 5932 primal_dofs = constraints_n[total_counts]; 5933 dual_dofs = size_of_constraint-primal_dofs; 5934 5935 if (pcbddc->dbg_flag) { 5936 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5937 } 5938 5939 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5940 5941 /* copy quadrature constraints for change of basis check */ 5942 if (pcbddc->dbg_flag) { 5943 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5944 } 5945 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5946 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5947 5948 /* compute QR decomposition of constraints */ 5949 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5950 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5951 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5952 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5953 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5954 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5955 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5956 5957 /* explictly compute R^-T */ 5958 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5959 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5960 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5961 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5962 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5963 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5964 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5965 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5966 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5967 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5968 5969 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5970 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5971 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5972 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5973 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5974 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5975 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5976 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5977 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5978 5979 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5980 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5981 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5982 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5983 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5984 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5985 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5986 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5987 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5988 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5989 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5990 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5991 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5992 5993 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5994 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5995 /* insert cols for primal dofs */ 5996 for (j=0;j<primal_dofs;j++) { 5997 start_vals = &qr_basis[j*size_of_constraint]; 5998 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5999 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6000 } 6001 /* insert cols for dual dofs */ 6002 for (j=0,k=0;j<dual_dofs;k++) { 6003 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6004 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6005 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6006 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6007 j++; 6008 } 6009 } 6010 6011 /* check change of basis */ 6012 if (pcbddc->dbg_flag) { 6013 PetscInt ii,jj; 6014 PetscBool valid_qr=PETSC_TRUE; 6015 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6016 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6018 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6020 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6021 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6022 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6023 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6024 for (jj=0;jj<size_of_constraint;jj++) { 6025 for (ii=0;ii<primal_dofs;ii++) { 6026 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6027 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 6028 } 6029 } 6030 if (!valid_qr) { 6031 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6032 for (jj=0;jj<size_of_constraint;jj++) { 6033 for (ii=0;ii<primal_dofs;ii++) { 6034 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6035 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6036 } 6037 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 6038 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6039 } 6040 } 6041 } 6042 } else { 6043 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6044 } 6045 } 6046 } else { /* simple transformation block */ 6047 PetscInt row,col; 6048 PetscScalar val,norm; 6049 6050 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6051 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6052 for (j=0;j<size_of_constraint;j++) { 6053 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6054 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6055 if (!PetscBTLookup(is_primal,row_B)) { 6056 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6057 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6058 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6059 } else { 6060 for (k=0;k<size_of_constraint;k++) { 6061 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6062 if (row != col) { 6063 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6064 } else { 6065 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6066 } 6067 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6068 } 6069 } 6070 } 6071 if (pcbddc->dbg_flag) { 6072 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6073 } 6074 } 6075 } else { 6076 if (pcbddc->dbg_flag) { 6077 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6078 } 6079 } 6080 } 6081 6082 /* free workspace */ 6083 if (qr_needed) { 6084 if (pcbddc->dbg_flag) { 6085 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6086 } 6087 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6088 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6089 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6090 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6091 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6092 } 6093 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6094 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6095 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6096 6097 /* assembling of global change of variable */ 6098 if (!pcbddc->fake_change) { 6099 Mat tmat; 6100 PetscInt bs; 6101 6102 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6103 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6104 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6105 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6106 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6107 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6108 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6109 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6110 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6111 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6112 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6113 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6114 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6115 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6116 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6117 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6118 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6119 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6120 6121 /* check */ 6122 if (pcbddc->dbg_flag) { 6123 PetscReal error; 6124 Vec x,x_change; 6125 6126 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6127 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6128 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6129 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6130 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6131 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6132 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6133 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6134 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6135 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6136 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6137 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6138 if (error > PETSC_SMALL) { 6139 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6140 } 6141 ierr = VecDestroy(&x);CHKERRQ(ierr); 6142 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6143 } 6144 /* adapt sub_schurs computed (if any) */ 6145 if (pcbddc->use_deluxe_scaling) { 6146 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6147 6148 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6149 if (sub_schurs && sub_schurs->S_Ej_all) { 6150 Mat S_new,tmat; 6151 IS is_all_N,is_V_Sall = NULL; 6152 6153 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6154 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6155 if (pcbddc->deluxe_zerorows) { 6156 ISLocalToGlobalMapping NtoSall; 6157 IS is_V; 6158 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6159 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6160 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6161 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6162 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6163 } 6164 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6165 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6166 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6167 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6168 if (pcbddc->deluxe_zerorows) { 6169 const PetscScalar *array; 6170 const PetscInt *idxs_V,*idxs_all; 6171 PetscInt i,n_V; 6172 6173 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6174 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6175 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6176 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6177 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6178 for (i=0;i<n_V;i++) { 6179 PetscScalar val; 6180 PetscInt idx; 6181 6182 idx = idxs_V[i]; 6183 val = array[idxs_all[idxs_V[i]]]; 6184 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6185 } 6186 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6187 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6188 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6189 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6190 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6191 } 6192 sub_schurs->S_Ej_all = S_new; 6193 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6194 if (sub_schurs->sum_S_Ej_all) { 6195 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6196 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6197 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6198 if (pcbddc->deluxe_zerorows) { 6199 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6200 } 6201 sub_schurs->sum_S_Ej_all = S_new; 6202 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6203 } 6204 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6205 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6206 } 6207 /* destroy any change of basis context in sub_schurs */ 6208 if (sub_schurs && sub_schurs->change) { 6209 PetscInt i; 6210 6211 for (i=0;i<sub_schurs->n_subs;i++) { 6212 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6213 } 6214 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6215 } 6216 } 6217 if (pcbddc->switch_static) { /* need to save the local change */ 6218 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6219 } else { 6220 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6221 } 6222 /* determine if any process has changed the pressures locally */ 6223 pcbddc->change_interior = pcbddc->benign_have_null; 6224 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6225 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6226 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6227 pcbddc->use_qr_single = qr_needed; 6228 } 6229 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6230 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6231 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6232 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6233 } else { 6234 Mat benign_global = NULL; 6235 if (pcbddc->benign_have_null) { 6236 Mat tmat; 6237 6238 pcbddc->change_interior = PETSC_TRUE; 6239 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6240 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6241 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6242 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6243 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6244 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6245 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6246 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6247 if (pcbddc->benign_change) { 6248 Mat M; 6249 6250 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6251 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6252 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6253 ierr = MatDestroy(&M);CHKERRQ(ierr); 6254 } else { 6255 Mat eye; 6256 PetscScalar *array; 6257 6258 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6259 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6260 for (i=0;i<pcis->n;i++) { 6261 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6262 } 6263 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6264 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6265 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6266 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6267 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6268 } 6269 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6270 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6271 } 6272 if (pcbddc->user_ChangeOfBasisMatrix) { 6273 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6274 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6275 } else if (pcbddc->benign_have_null) { 6276 pcbddc->ChangeOfBasisMatrix = benign_global; 6277 } 6278 } 6279 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6280 IS is_global; 6281 const PetscInt *gidxs; 6282 6283 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6284 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6285 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6286 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6287 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6288 } 6289 } 6290 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6291 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6292 } 6293 6294 if (!pcbddc->fake_change) { 6295 /* add pressure dofs to set of primal nodes for numbering purposes */ 6296 for (i=0;i<pcbddc->benign_n;i++) { 6297 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6298 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6299 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6300 pcbddc->local_primal_size_cc++; 6301 pcbddc->local_primal_size++; 6302 } 6303 6304 /* check if a new primal space has been introduced (also take into account benign trick) */ 6305 pcbddc->new_primal_space_local = PETSC_TRUE; 6306 if (olocal_primal_size == pcbddc->local_primal_size) { 6307 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6308 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6309 if (!pcbddc->new_primal_space_local) { 6310 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6311 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6312 } 6313 } 6314 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6315 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6316 } 6317 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6318 6319 /* flush dbg viewer */ 6320 if (pcbddc->dbg_flag) { 6321 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6322 } 6323 6324 /* free workspace */ 6325 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6326 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6327 if (!pcbddc->adaptive_selection) { 6328 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6329 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6330 } else { 6331 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6332 pcbddc->adaptive_constraints_idxs_ptr, 6333 pcbddc->adaptive_constraints_data_ptr, 6334 pcbddc->adaptive_constraints_idxs, 6335 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6336 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6337 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6338 } 6339 PetscFunctionReturn(0); 6340 } 6341 6342 #undef __FUNCT__ 6343 #define __FUNCT__ "PCBDDCAnalyzeInterface" 6344 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6345 { 6346 ISLocalToGlobalMapping map; 6347 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6348 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6349 PetscInt i,N; 6350 PetscBool rcsr = PETSC_FALSE; 6351 PetscErrorCode ierr; 6352 6353 PetscFunctionBegin; 6354 if (pcbddc->recompute_topography) { 6355 pcbddc->graphanalyzed = PETSC_FALSE; 6356 /* Reset previously computed graph */ 6357 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6358 /* Init local Graph struct */ 6359 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6360 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6361 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6362 6363 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6364 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6365 } 6366 /* Check validity of the csr graph passed in by the user */ 6367 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6368 6369 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6370 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6371 PetscInt *xadj,*adjncy; 6372 PetscInt nvtxs; 6373 PetscBool flg_row=PETSC_FALSE; 6374 6375 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6376 if (flg_row) { 6377 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6378 pcbddc->computed_rowadj = PETSC_TRUE; 6379 } 6380 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6381 rcsr = PETSC_TRUE; 6382 } 6383 if (pcbddc->dbg_flag) { 6384 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6385 } 6386 6387 /* Setup of Graph */ 6388 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6389 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6390 6391 /* attach info on disconnected subdomains if present */ 6392 if (pcbddc->n_local_subs) { 6393 PetscInt *local_subs; 6394 6395 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6396 for (i=0;i<pcbddc->n_local_subs;i++) { 6397 const PetscInt *idxs; 6398 PetscInt nl,j; 6399 6400 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6401 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6402 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6403 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6404 } 6405 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6406 pcbddc->mat_graph->local_subs = local_subs; 6407 } 6408 } 6409 6410 if (!pcbddc->graphanalyzed) { 6411 /* Graph's connected components analysis */ 6412 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6413 pcbddc->graphanalyzed = PETSC_TRUE; 6414 } 6415 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6416 PetscFunctionReturn(0); 6417 } 6418 6419 #undef __FUNCT__ 6420 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6421 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6422 { 6423 PetscInt i,j; 6424 PetscScalar *alphas; 6425 PetscErrorCode ierr; 6426 6427 PetscFunctionBegin; 6428 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6429 for (i=0;i<n;i++) { 6430 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6431 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6432 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6433 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6434 } 6435 ierr = PetscFree(alphas);CHKERRQ(ierr); 6436 PetscFunctionReturn(0); 6437 } 6438 6439 #undef __FUNCT__ 6440 #define __FUNCT__ "PCBDDCMatISGetSubassemblingPattern" 6441 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6442 { 6443 Mat A; 6444 PetscInt n_neighs,*neighs,*n_shared,**shared; 6445 PetscMPIInt size,rank,color; 6446 PetscInt *xadj,*adjncy; 6447 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6448 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6449 PetscInt void_procs,*procs_candidates = NULL; 6450 PetscInt xadj_count,*count; 6451 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6452 PetscSubcomm psubcomm; 6453 MPI_Comm subcomm; 6454 PetscErrorCode ierr; 6455 6456 PetscFunctionBegin; 6457 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6458 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6459 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6460 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6461 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6462 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6463 6464 if (have_void) *have_void = PETSC_FALSE; 6465 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6466 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6467 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6468 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6469 im_active = !!n; 6470 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6471 void_procs = size - active_procs; 6472 /* get ranks of of non-active processes in mat communicator */ 6473 if (void_procs) { 6474 PetscInt ncand; 6475 6476 if (have_void) *have_void = PETSC_TRUE; 6477 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6478 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6479 for (i=0,ncand=0;i<size;i++) { 6480 if (!procs_candidates[i]) { 6481 procs_candidates[ncand++] = i; 6482 } 6483 } 6484 /* force n_subdomains to be not greater that the number of non-active processes */ 6485 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6486 } 6487 6488 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6489 number of subdomains requested 1 -> send to master or first candidate in voids */ 6490 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6491 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6492 PetscInt issize,isidx,dest; 6493 if (*n_subdomains == 1) dest = 0; 6494 else dest = rank; 6495 if (im_active) { 6496 issize = 1; 6497 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6498 isidx = procs_candidates[dest]; 6499 } else { 6500 isidx = dest; 6501 } 6502 } else { 6503 issize = 0; 6504 isidx = -1; 6505 } 6506 if (*n_subdomains != 1) *n_subdomains = active_procs; 6507 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6508 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6509 PetscFunctionReturn(0); 6510 } 6511 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6512 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6513 threshold = PetscMax(threshold,2); 6514 6515 /* Get info on mapping */ 6516 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6517 6518 /* build local CSR graph of subdomains' connectivity */ 6519 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6520 xadj[0] = 0; 6521 xadj[1] = PetscMax(n_neighs-1,0); 6522 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6523 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6524 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6525 for (i=1;i<n_neighs;i++) 6526 for (j=0;j<n_shared[i];j++) 6527 count[shared[i][j]] += 1; 6528 6529 xadj_count = 0; 6530 for (i=1;i<n_neighs;i++) { 6531 for (j=0;j<n_shared[i];j++) { 6532 if (count[shared[i][j]] < threshold) { 6533 adjncy[xadj_count] = neighs[i]; 6534 adjncy_wgt[xadj_count] = n_shared[i]; 6535 xadj_count++; 6536 break; 6537 } 6538 } 6539 } 6540 xadj[1] = xadj_count; 6541 ierr = PetscFree(count);CHKERRQ(ierr); 6542 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6543 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6544 6545 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6546 6547 /* Restrict work on active processes only */ 6548 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6549 if (void_procs) { 6550 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6551 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6552 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6553 subcomm = PetscSubcommChild(psubcomm); 6554 } else { 6555 psubcomm = NULL; 6556 subcomm = PetscObjectComm((PetscObject)mat); 6557 } 6558 6559 v_wgt = NULL; 6560 if (!color) { 6561 ierr = PetscFree(xadj);CHKERRQ(ierr); 6562 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6563 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6564 } else { 6565 Mat subdomain_adj; 6566 IS new_ranks,new_ranks_contig; 6567 MatPartitioning partitioner; 6568 PetscInt rstart=0,rend=0; 6569 PetscInt *is_indices,*oldranks; 6570 PetscMPIInt size; 6571 PetscBool aggregate; 6572 6573 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6574 if (void_procs) { 6575 PetscInt prank = rank; 6576 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6577 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6578 for (i=0;i<xadj[1];i++) { 6579 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6580 } 6581 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6582 } else { 6583 oldranks = NULL; 6584 } 6585 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6586 if (aggregate) { /* TODO: all this part could be made more efficient */ 6587 PetscInt lrows,row,ncols,*cols; 6588 PetscMPIInt nrank; 6589 PetscScalar *vals; 6590 6591 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6592 lrows = 0; 6593 if (nrank<redprocs) { 6594 lrows = size/redprocs; 6595 if (nrank<size%redprocs) lrows++; 6596 } 6597 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6598 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6599 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6600 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6601 row = nrank; 6602 ncols = xadj[1]-xadj[0]; 6603 cols = adjncy; 6604 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6605 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6606 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6607 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6608 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6609 ierr = PetscFree(xadj);CHKERRQ(ierr); 6610 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6611 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6612 ierr = PetscFree(vals);CHKERRQ(ierr); 6613 if (use_vwgt) { 6614 Vec v; 6615 const PetscScalar *array; 6616 PetscInt nl; 6617 6618 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6619 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6620 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6621 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6622 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6623 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6624 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6625 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6626 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6627 ierr = VecDestroy(&v);CHKERRQ(ierr); 6628 } 6629 } else { 6630 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6631 if (use_vwgt) { 6632 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6633 v_wgt[0] = n; 6634 } 6635 } 6636 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6637 6638 /* Partition */ 6639 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6640 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6641 if (v_wgt) { 6642 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6643 } 6644 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6645 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6646 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6647 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6648 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6649 6650 /* renumber new_ranks to avoid "holes" in new set of processors */ 6651 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6652 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6653 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6654 if (!aggregate) { 6655 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6656 #if defined(PETSC_USE_DEBUG) 6657 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6658 #endif 6659 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6660 } else if (oldranks) { 6661 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6662 } else { 6663 ranks_send_to_idx[0] = is_indices[0]; 6664 } 6665 } else { 6666 PetscInt idxs[1]; 6667 PetscMPIInt tag; 6668 MPI_Request *reqs; 6669 6670 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6671 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6672 for (i=rstart;i<rend;i++) { 6673 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6674 } 6675 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6676 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6677 ierr = PetscFree(reqs);CHKERRQ(ierr); 6678 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6679 #if defined(PETSC_USE_DEBUG) 6680 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6681 #endif 6682 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6683 } else if (oldranks) { 6684 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6685 } else { 6686 ranks_send_to_idx[0] = idxs[0]; 6687 } 6688 } 6689 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6690 /* clean up */ 6691 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6692 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6693 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6694 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6695 } 6696 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6697 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6698 6699 /* assemble parallel IS for sends */ 6700 i = 1; 6701 if (!color) i=0; 6702 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6703 PetscFunctionReturn(0); 6704 } 6705 6706 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6707 6708 #undef __FUNCT__ 6709 #define __FUNCT__ "PCBDDCMatISSubassemble" 6710 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6711 { 6712 Mat local_mat; 6713 IS is_sends_internal; 6714 PetscInt rows,cols,new_local_rows; 6715 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6716 PetscBool ismatis,isdense,newisdense,destroy_mat; 6717 ISLocalToGlobalMapping l2gmap; 6718 PetscInt* l2gmap_indices; 6719 const PetscInt* is_indices; 6720 MatType new_local_type; 6721 /* buffers */ 6722 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6723 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6724 PetscInt *recv_buffer_idxs_local; 6725 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6726 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6727 /* MPI */ 6728 MPI_Comm comm,comm_n; 6729 PetscSubcomm subcomm; 6730 PetscMPIInt n_sends,n_recvs,commsize; 6731 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6732 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6733 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6734 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6735 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6736 PetscErrorCode ierr; 6737 6738 PetscFunctionBegin; 6739 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6740 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6741 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6742 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6743 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6744 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6745 PetscValidLogicalCollectiveBool(mat,reuse,6); 6746 PetscValidLogicalCollectiveInt(mat,nis,8); 6747 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6748 if (nvecs) { 6749 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6750 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6751 } 6752 /* further checks */ 6753 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6754 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6755 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6756 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6757 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6758 if (reuse && *mat_n) { 6759 PetscInt mrows,mcols,mnrows,mncols; 6760 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6761 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6762 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6763 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6764 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6765 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6766 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6767 } 6768 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6769 PetscValidLogicalCollectiveInt(mat,bs,0); 6770 6771 /* prepare IS for sending if not provided */ 6772 if (!is_sends) { 6773 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6774 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6775 } else { 6776 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6777 is_sends_internal = is_sends; 6778 } 6779 6780 /* get comm */ 6781 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6782 6783 /* compute number of sends */ 6784 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6785 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6786 6787 /* compute number of receives */ 6788 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6789 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6790 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6791 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6792 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6793 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6794 ierr = PetscFree(iflags);CHKERRQ(ierr); 6795 6796 /* restrict comm if requested */ 6797 subcomm = 0; 6798 destroy_mat = PETSC_FALSE; 6799 if (restrict_comm) { 6800 PetscMPIInt color,subcommsize; 6801 6802 color = 0; 6803 if (restrict_full) { 6804 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6805 } else { 6806 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6807 } 6808 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6809 subcommsize = commsize - subcommsize; 6810 /* check if reuse has been requested */ 6811 if (reuse) { 6812 if (*mat_n) { 6813 PetscMPIInt subcommsize2; 6814 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6815 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6816 comm_n = PetscObjectComm((PetscObject)*mat_n); 6817 } else { 6818 comm_n = PETSC_COMM_SELF; 6819 } 6820 } else { /* MAT_INITIAL_MATRIX */ 6821 PetscMPIInt rank; 6822 6823 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6824 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6825 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6826 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6827 comm_n = PetscSubcommChild(subcomm); 6828 } 6829 /* flag to destroy *mat_n if not significative */ 6830 if (color) destroy_mat = PETSC_TRUE; 6831 } else { 6832 comm_n = comm; 6833 } 6834 6835 /* prepare send/receive buffers */ 6836 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6837 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6838 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6839 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6840 if (nis) { 6841 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6842 } 6843 6844 /* Get data from local matrices */ 6845 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6846 /* TODO: See below some guidelines on how to prepare the local buffers */ 6847 /* 6848 send_buffer_vals should contain the raw values of the local matrix 6849 send_buffer_idxs should contain: 6850 - MatType_PRIVATE type 6851 - PetscInt size_of_l2gmap 6852 - PetscInt global_row_indices[size_of_l2gmap] 6853 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6854 */ 6855 else { 6856 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6857 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6858 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6859 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6860 send_buffer_idxs[1] = i; 6861 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6862 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6863 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6864 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6865 for (i=0;i<n_sends;i++) { 6866 ilengths_vals[is_indices[i]] = len*len; 6867 ilengths_idxs[is_indices[i]] = len+2; 6868 } 6869 } 6870 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6871 /* additional is (if any) */ 6872 if (nis) { 6873 PetscMPIInt psum; 6874 PetscInt j; 6875 for (j=0,psum=0;j<nis;j++) { 6876 PetscInt plen; 6877 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6878 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6879 psum += len+1; /* indices + lenght */ 6880 } 6881 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6882 for (j=0,psum=0;j<nis;j++) { 6883 PetscInt plen; 6884 const PetscInt *is_array_idxs; 6885 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6886 send_buffer_idxs_is[psum] = plen; 6887 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6888 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6889 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6890 psum += plen+1; /* indices + lenght */ 6891 } 6892 for (i=0;i<n_sends;i++) { 6893 ilengths_idxs_is[is_indices[i]] = psum; 6894 } 6895 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6896 } 6897 6898 buf_size_idxs = 0; 6899 buf_size_vals = 0; 6900 buf_size_idxs_is = 0; 6901 buf_size_vecs = 0; 6902 for (i=0;i<n_recvs;i++) { 6903 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6904 buf_size_vals += (PetscInt)olengths_vals[i]; 6905 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6906 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6907 } 6908 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6909 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6910 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6911 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6912 6913 /* get new tags for clean communications */ 6914 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6915 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6916 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6917 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6918 6919 /* allocate for requests */ 6920 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6921 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6922 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6923 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6924 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6925 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6926 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6927 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6928 6929 /* communications */ 6930 ptr_idxs = recv_buffer_idxs; 6931 ptr_vals = recv_buffer_vals; 6932 ptr_idxs_is = recv_buffer_idxs_is; 6933 ptr_vecs = recv_buffer_vecs; 6934 for (i=0;i<n_recvs;i++) { 6935 source_dest = onodes[i]; 6936 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6937 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6938 ptr_idxs += olengths_idxs[i]; 6939 ptr_vals += olengths_vals[i]; 6940 if (nis) { 6941 source_dest = onodes_is[i]; 6942 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6943 ptr_idxs_is += olengths_idxs_is[i]; 6944 } 6945 if (nvecs) { 6946 source_dest = onodes[i]; 6947 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6948 ptr_vecs += olengths_idxs[i]-2; 6949 } 6950 } 6951 for (i=0;i<n_sends;i++) { 6952 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6953 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6954 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6955 if (nis) { 6956 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6957 } 6958 if (nvecs) { 6959 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6960 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6961 } 6962 } 6963 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6964 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6965 6966 /* assemble new l2g map */ 6967 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6968 ptr_idxs = recv_buffer_idxs; 6969 new_local_rows = 0; 6970 for (i=0;i<n_recvs;i++) { 6971 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6972 ptr_idxs += olengths_idxs[i]; 6973 } 6974 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6975 ptr_idxs = recv_buffer_idxs; 6976 new_local_rows = 0; 6977 for (i=0;i<n_recvs;i++) { 6978 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6979 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6980 ptr_idxs += olengths_idxs[i]; 6981 } 6982 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6983 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6984 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6985 6986 /* infer new local matrix type from received local matrices type */ 6987 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6988 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6989 if (n_recvs) { 6990 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6991 ptr_idxs = recv_buffer_idxs; 6992 for (i=0;i<n_recvs;i++) { 6993 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6994 new_local_type_private = MATAIJ_PRIVATE; 6995 break; 6996 } 6997 ptr_idxs += olengths_idxs[i]; 6998 } 6999 switch (new_local_type_private) { 7000 case MATDENSE_PRIVATE: 7001 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 7002 new_local_type = MATSEQAIJ; 7003 bs = 1; 7004 } else { /* if I receive only 1 dense matrix */ 7005 new_local_type = MATSEQDENSE; 7006 bs = 1; 7007 } 7008 break; 7009 case MATAIJ_PRIVATE: 7010 new_local_type = MATSEQAIJ; 7011 bs = 1; 7012 break; 7013 case MATBAIJ_PRIVATE: 7014 new_local_type = MATSEQBAIJ; 7015 break; 7016 case MATSBAIJ_PRIVATE: 7017 new_local_type = MATSEQSBAIJ; 7018 break; 7019 default: 7020 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 7021 break; 7022 } 7023 } else { /* by default, new_local_type is seqdense */ 7024 new_local_type = MATSEQDENSE; 7025 bs = 1; 7026 } 7027 7028 /* create MATIS object if needed */ 7029 if (!reuse) { 7030 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7031 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7032 } else { 7033 /* it also destroys the local matrices */ 7034 if (*mat_n) { 7035 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7036 } else { /* this is a fake object */ 7037 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7038 } 7039 } 7040 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7041 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7042 7043 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7044 7045 /* Global to local map of received indices */ 7046 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7047 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7048 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7049 7050 /* restore attributes -> type of incoming data and its size */ 7051 buf_size_idxs = 0; 7052 for (i=0;i<n_recvs;i++) { 7053 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7054 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7055 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7056 } 7057 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7058 7059 /* set preallocation */ 7060 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7061 if (!newisdense) { 7062 PetscInt *new_local_nnz=0; 7063 7064 ptr_idxs = recv_buffer_idxs_local; 7065 if (n_recvs) { 7066 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7067 } 7068 for (i=0;i<n_recvs;i++) { 7069 PetscInt j; 7070 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7071 for (j=0;j<*(ptr_idxs+1);j++) { 7072 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7073 } 7074 } else { 7075 /* TODO */ 7076 } 7077 ptr_idxs += olengths_idxs[i]; 7078 } 7079 if (new_local_nnz) { 7080 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7081 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7082 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7083 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7084 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7085 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7086 } else { 7087 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7088 } 7089 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7090 } else { 7091 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7092 } 7093 7094 /* set values */ 7095 ptr_vals = recv_buffer_vals; 7096 ptr_idxs = recv_buffer_idxs_local; 7097 for (i=0;i<n_recvs;i++) { 7098 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7099 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7100 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7101 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7102 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7103 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7104 } else { 7105 /* TODO */ 7106 } 7107 ptr_idxs += olengths_idxs[i]; 7108 ptr_vals += olengths_vals[i]; 7109 } 7110 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7111 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7112 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7113 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7114 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7115 7116 #if 0 7117 if (!restrict_comm) { /* check */ 7118 Vec lvec,rvec; 7119 PetscReal infty_error; 7120 7121 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7122 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7123 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7124 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7125 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7126 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7127 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7128 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7129 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7130 } 7131 #endif 7132 7133 /* assemble new additional is (if any) */ 7134 if (nis) { 7135 PetscInt **temp_idxs,*count_is,j,psum; 7136 7137 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7138 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7139 ptr_idxs = recv_buffer_idxs_is; 7140 psum = 0; 7141 for (i=0;i<n_recvs;i++) { 7142 for (j=0;j<nis;j++) { 7143 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7144 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7145 psum += plen; 7146 ptr_idxs += plen+1; /* shift pointer to received data */ 7147 } 7148 } 7149 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7150 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7151 for (i=1;i<nis;i++) { 7152 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7153 } 7154 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7155 ptr_idxs = recv_buffer_idxs_is; 7156 for (i=0;i<n_recvs;i++) { 7157 for (j=0;j<nis;j++) { 7158 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7159 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7160 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7161 ptr_idxs += plen+1; /* shift pointer to received data */ 7162 } 7163 } 7164 for (i=0;i<nis;i++) { 7165 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7166 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7167 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7168 } 7169 ierr = PetscFree(count_is);CHKERRQ(ierr); 7170 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7171 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7172 } 7173 /* free workspace */ 7174 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7175 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7176 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7177 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7178 if (isdense) { 7179 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7180 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7181 } else { 7182 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7183 } 7184 if (nis) { 7185 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7186 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7187 } 7188 7189 if (nvecs) { 7190 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7191 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7192 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7193 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7194 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7195 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7196 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7197 /* set values */ 7198 ptr_vals = recv_buffer_vecs; 7199 ptr_idxs = recv_buffer_idxs_local; 7200 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7201 for (i=0;i<n_recvs;i++) { 7202 PetscInt j; 7203 for (j=0;j<*(ptr_idxs+1);j++) { 7204 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7205 } 7206 ptr_idxs += olengths_idxs[i]; 7207 ptr_vals += olengths_idxs[i]-2; 7208 } 7209 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7210 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7211 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7212 } 7213 7214 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7215 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7216 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7217 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7218 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7219 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7220 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7221 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7222 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7223 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7224 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7225 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7226 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7227 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7228 ierr = PetscFree(onodes);CHKERRQ(ierr); 7229 if (nis) { 7230 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7231 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7232 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7233 } 7234 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7235 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7236 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7237 for (i=0;i<nis;i++) { 7238 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7239 } 7240 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7241 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7242 } 7243 *mat_n = NULL; 7244 } 7245 PetscFunctionReturn(0); 7246 } 7247 7248 /* temporary hack into ksp private data structure */ 7249 #include <petsc/private/kspimpl.h> 7250 7251 #undef __FUNCT__ 7252 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 7253 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7254 { 7255 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7256 PC_IS *pcis = (PC_IS*)pc->data; 7257 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7258 Mat coarsedivudotp = NULL; 7259 Mat coarseG,t_coarse_mat_is; 7260 MatNullSpace CoarseNullSpace = NULL; 7261 ISLocalToGlobalMapping coarse_islg; 7262 IS coarse_is,*isarray; 7263 PetscInt i,im_active=-1,active_procs=-1; 7264 PetscInt nis,nisdofs,nisneu,nisvert; 7265 PC pc_temp; 7266 PCType coarse_pc_type; 7267 KSPType coarse_ksp_type; 7268 PetscBool multilevel_requested,multilevel_allowed; 7269 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7270 PetscInt ncoarse,nedcfield; 7271 PetscBool compute_vecs = PETSC_FALSE; 7272 PetscScalar *array; 7273 MatReuse coarse_mat_reuse; 7274 PetscBool restr, full_restr, have_void; 7275 PetscErrorCode ierr; 7276 7277 PetscFunctionBegin; 7278 /* Assign global numbering to coarse dofs */ 7279 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7280 PetscInt ocoarse_size; 7281 compute_vecs = PETSC_TRUE; 7282 ocoarse_size = pcbddc->coarse_size; 7283 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7284 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7285 /* see if we can avoid some work */ 7286 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7287 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7288 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7289 PC pc; 7290 PetscBool isbddc; 7291 7292 /* temporary workaround since PCBDDC does not have a reset method so far */ 7293 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 7294 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 7295 if (isbddc) { 7296 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 7297 } else { 7298 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7299 } 7300 coarse_reuse = PETSC_FALSE; 7301 } else { /* we can safely reuse already computed coarse matrix */ 7302 coarse_reuse = PETSC_TRUE; 7303 } 7304 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7305 coarse_reuse = PETSC_FALSE; 7306 } 7307 /* reset any subassembling information */ 7308 if (!coarse_reuse || pcbddc->recompute_topography) { 7309 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7310 } 7311 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7312 coarse_reuse = PETSC_TRUE; 7313 } 7314 /* assemble coarse matrix */ 7315 if (coarse_reuse && pcbddc->coarse_ksp) { 7316 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7317 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7318 coarse_mat_reuse = MAT_REUSE_MATRIX; 7319 } else { 7320 coarse_mat = NULL; 7321 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7322 } 7323 7324 /* creates temporary l2gmap and IS for coarse indexes */ 7325 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7326 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7327 7328 /* creates temporary MATIS object for coarse matrix */ 7329 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7330 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7331 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7332 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7333 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7334 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7335 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7336 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7337 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7338 7339 /* count "active" (i.e. with positive local size) and "void" processes */ 7340 im_active = !!(pcis->n); 7341 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7342 7343 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7344 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7345 /* full_restr : just use the receivers from the subassembling pattern */ 7346 coarse_mat_is = NULL; 7347 multilevel_allowed = PETSC_FALSE; 7348 multilevel_requested = PETSC_FALSE; 7349 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7350 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7351 if (multilevel_requested) { 7352 ncoarse = active_procs/pcbddc->coarsening_ratio; 7353 restr = PETSC_FALSE; 7354 full_restr = PETSC_FALSE; 7355 } else { 7356 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7357 restr = PETSC_TRUE; 7358 full_restr = PETSC_TRUE; 7359 } 7360 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7361 ncoarse = PetscMax(1,ncoarse); 7362 if (!pcbddc->coarse_subassembling) { 7363 if (pcbddc->coarsening_ratio > 1) { 7364 if (multilevel_requested) { 7365 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7366 } else { 7367 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7368 } 7369 } else { 7370 PetscMPIInt size,rank; 7371 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7372 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7373 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7374 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7375 } 7376 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7377 PetscInt psum; 7378 PetscMPIInt size; 7379 if (pcbddc->coarse_ksp) psum = 1; 7380 else psum = 0; 7381 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7382 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7383 if (ncoarse < size) have_void = PETSC_TRUE; 7384 } 7385 /* determine if we can go multilevel */ 7386 if (multilevel_requested) { 7387 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7388 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7389 } 7390 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7391 7392 /* dump subassembling pattern */ 7393 if (pcbddc->dbg_flag && multilevel_allowed) { 7394 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7395 } 7396 7397 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7398 nedcfield = -1; 7399 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7400 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7401 const PetscInt *idxs; 7402 ISLocalToGlobalMapping tmap; 7403 7404 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7405 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7406 /* allocate space for temporary storage */ 7407 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7408 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7409 /* allocate for IS array */ 7410 nisdofs = pcbddc->n_ISForDofsLocal; 7411 if (pcbddc->nedclocal) { 7412 if (pcbddc->nedfield > -1) { 7413 nedcfield = pcbddc->nedfield; 7414 } else { 7415 nedcfield = 0; 7416 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7417 nisdofs = 1; 7418 } 7419 } 7420 nisneu = !!pcbddc->NeumannBoundariesLocal; 7421 nisvert = 0; /* nisvert is not used */ 7422 nis = nisdofs + nisneu + nisvert; 7423 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7424 /* dofs splitting */ 7425 for (i=0;i<nisdofs;i++) { 7426 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7427 if (nedcfield != i) { 7428 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7429 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7430 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7431 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7432 } else { 7433 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7434 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7435 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7436 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7437 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7438 } 7439 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7440 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7441 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7442 } 7443 /* neumann boundaries */ 7444 if (pcbddc->NeumannBoundariesLocal) { 7445 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7446 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7447 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7448 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7449 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7450 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7451 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7452 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7453 } 7454 /* free memory */ 7455 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7456 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7457 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7458 } else { 7459 nis = 0; 7460 nisdofs = 0; 7461 nisneu = 0; 7462 nisvert = 0; 7463 isarray = NULL; 7464 } 7465 /* destroy no longer needed map */ 7466 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7467 7468 /* subassemble */ 7469 if (multilevel_allowed) { 7470 Vec vp[1]; 7471 PetscInt nvecs = 0; 7472 PetscBool reuse,reuser; 7473 7474 if (coarse_mat) reuse = PETSC_TRUE; 7475 else reuse = PETSC_FALSE; 7476 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7477 vp[0] = NULL; 7478 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7479 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7480 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7481 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7482 nvecs = 1; 7483 7484 if (pcbddc->divudotp) { 7485 Mat B,loc_divudotp; 7486 Vec v,p; 7487 IS dummy; 7488 PetscInt np; 7489 7490 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7491 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7492 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7493 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7494 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7495 ierr = VecSet(p,1.);CHKERRQ(ierr); 7496 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7497 ierr = VecDestroy(&p);CHKERRQ(ierr); 7498 ierr = MatDestroy(&B);CHKERRQ(ierr); 7499 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7500 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7501 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7502 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7503 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7504 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7505 ierr = VecDestroy(&v);CHKERRQ(ierr); 7506 } 7507 } 7508 if (reuser) { 7509 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7510 } else { 7511 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7512 } 7513 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7514 PetscScalar *arraym,*arrayv; 7515 PetscInt nl; 7516 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7517 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7518 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7519 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7520 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7521 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7522 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7523 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7524 } else { 7525 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7526 } 7527 } else { 7528 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7529 } 7530 if (coarse_mat_is || coarse_mat) { 7531 PetscMPIInt size; 7532 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7533 if (!multilevel_allowed) { 7534 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7535 } else { 7536 Mat A; 7537 7538 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7539 if (coarse_mat_is) { 7540 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7541 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7542 coarse_mat = coarse_mat_is; 7543 } 7544 /* be sure we don't have MatSeqDENSE as local mat */ 7545 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7546 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7547 } 7548 } 7549 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7550 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7551 7552 /* create local to global scatters for coarse problem */ 7553 if (compute_vecs) { 7554 PetscInt lrows; 7555 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7556 if (coarse_mat) { 7557 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7558 } else { 7559 lrows = 0; 7560 } 7561 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7562 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7563 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7564 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7565 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7566 } 7567 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7568 7569 /* set defaults for coarse KSP and PC */ 7570 if (multilevel_allowed) { 7571 coarse_ksp_type = KSPRICHARDSON; 7572 coarse_pc_type = PCBDDC; 7573 } else { 7574 coarse_ksp_type = KSPPREONLY; 7575 coarse_pc_type = PCREDUNDANT; 7576 } 7577 7578 /* print some info if requested */ 7579 if (pcbddc->dbg_flag) { 7580 if (!multilevel_allowed) { 7581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7582 if (multilevel_requested) { 7583 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7584 } else if (pcbddc->max_levels) { 7585 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7586 } 7587 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7588 } 7589 } 7590 7591 /* communicate coarse discrete gradient */ 7592 coarseG = NULL; 7593 if (pcbddc->nedcG && multilevel_allowed) { 7594 MPI_Comm ccomm; 7595 if (coarse_mat) { 7596 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7597 } else { 7598 ccomm = MPI_COMM_NULL; 7599 } 7600 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7601 } 7602 7603 /* create the coarse KSP object only once with defaults */ 7604 if (coarse_mat) { 7605 PetscViewer dbg_viewer = NULL; 7606 if (pcbddc->dbg_flag) { 7607 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7608 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7609 } 7610 if (!pcbddc->coarse_ksp) { 7611 char prefix[256],str_level[16]; 7612 size_t len; 7613 7614 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7615 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7616 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7617 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7618 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7619 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7620 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7621 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7622 /* TODO is this logic correct? should check for coarse_mat type */ 7623 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7624 /* prefix */ 7625 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7626 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7627 if (!pcbddc->current_level) { 7628 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7629 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7630 } else { 7631 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7632 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7633 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7634 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7635 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7636 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7637 } 7638 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7639 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7640 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7641 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7642 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7643 /* allow user customization */ 7644 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7645 } 7646 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7647 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7648 if (nisdofs) { 7649 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7650 for (i=0;i<nisdofs;i++) { 7651 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7652 } 7653 } 7654 if (nisneu) { 7655 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7656 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7657 } 7658 if (nisvert) { 7659 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7660 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7661 } 7662 if (coarseG) { 7663 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7664 } 7665 7666 /* get some info after set from options */ 7667 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7668 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7669 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7670 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7671 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7672 isbddc = PETSC_FALSE; 7673 } 7674 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7675 if (isredundant) { 7676 KSP inner_ksp; 7677 PC inner_pc; 7678 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7679 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7680 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7681 } 7682 7683 /* parameters which miss an API */ 7684 if (isbddc) { 7685 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7686 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7687 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7688 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7689 if (pcbddc_coarse->benign_saddle_point) { 7690 Mat coarsedivudotp_is; 7691 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7692 IS row,col; 7693 const PetscInt *gidxs; 7694 PetscInt n,st,M,N; 7695 7696 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7697 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7698 st = st-n; 7699 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7700 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7701 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7702 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7703 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7704 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7705 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7706 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7707 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7708 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7709 ierr = ISDestroy(&row);CHKERRQ(ierr); 7710 ierr = ISDestroy(&col);CHKERRQ(ierr); 7711 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7712 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7713 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7714 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7715 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7716 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7717 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7718 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7719 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7720 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7721 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7722 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7723 } 7724 } 7725 7726 /* propagate symmetry info of coarse matrix */ 7727 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7728 if (pc->pmat->symmetric_set) { 7729 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7730 } 7731 if (pc->pmat->hermitian_set) { 7732 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7733 } 7734 if (pc->pmat->spd_set) { 7735 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7736 } 7737 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7738 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7739 } 7740 /* set operators */ 7741 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7742 if (pcbddc->dbg_flag) { 7743 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7744 } 7745 } 7746 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7747 ierr = PetscFree(isarray);CHKERRQ(ierr); 7748 #if 0 7749 { 7750 PetscViewer viewer; 7751 char filename[256]; 7752 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7753 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7754 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7755 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7756 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7757 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7758 } 7759 #endif 7760 7761 if (pcbddc->coarse_ksp) { 7762 Vec crhs,csol; 7763 7764 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7765 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7766 if (!csol) { 7767 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7768 } 7769 if (!crhs) { 7770 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7771 } 7772 } 7773 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7774 7775 /* compute null space for coarse solver if the benign trick has been requested */ 7776 if (pcbddc->benign_null) { 7777 7778 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7779 for (i=0;i<pcbddc->benign_n;i++) { 7780 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7781 } 7782 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7783 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7784 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7785 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7786 if (coarse_mat) { 7787 Vec nullv; 7788 PetscScalar *array,*array2; 7789 PetscInt nl; 7790 7791 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7792 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7793 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7794 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7795 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7796 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7797 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7798 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7799 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7800 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7801 } 7802 } 7803 7804 if (pcbddc->coarse_ksp) { 7805 PetscBool ispreonly; 7806 7807 if (CoarseNullSpace) { 7808 PetscBool isnull; 7809 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7810 if (isnull) { 7811 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7812 } 7813 /* TODO: add local nullspaces (if any) */ 7814 } 7815 /* setup coarse ksp */ 7816 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7817 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7818 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7819 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7820 KSP check_ksp; 7821 KSPType check_ksp_type; 7822 PC check_pc; 7823 Vec check_vec,coarse_vec; 7824 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7825 PetscInt its; 7826 PetscBool compute_eigs; 7827 PetscReal *eigs_r,*eigs_c; 7828 PetscInt neigs; 7829 const char *prefix; 7830 7831 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7832 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7833 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7834 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7835 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7836 /* prevent from setup unneeded object */ 7837 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7838 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7839 if (ispreonly) { 7840 check_ksp_type = KSPPREONLY; 7841 compute_eigs = PETSC_FALSE; 7842 } else { 7843 check_ksp_type = KSPGMRES; 7844 compute_eigs = PETSC_TRUE; 7845 } 7846 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7847 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7848 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7849 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7850 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7851 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7852 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7853 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7854 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7855 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7856 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7857 /* create random vec */ 7858 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7859 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7860 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7861 /* solve coarse problem */ 7862 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7863 /* set eigenvalue estimation if preonly has not been requested */ 7864 if (compute_eigs) { 7865 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7866 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7867 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7868 if (neigs) { 7869 lambda_max = eigs_r[neigs-1]; 7870 lambda_min = eigs_r[0]; 7871 if (pcbddc->use_coarse_estimates) { 7872 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7873 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7874 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7875 } 7876 } 7877 } 7878 } 7879 7880 /* check coarse problem residual error */ 7881 if (pcbddc->dbg_flag) { 7882 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7883 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7884 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7885 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7886 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7887 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7888 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7889 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7890 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7891 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7892 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7893 if (CoarseNullSpace) { 7894 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7895 } 7896 if (compute_eigs) { 7897 PetscReal lambda_max_s,lambda_min_s; 7898 KSPConvergedReason reason; 7899 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7900 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7901 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7902 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7903 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7904 for (i=0;i<neigs;i++) { 7905 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7906 } 7907 } 7908 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7909 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7910 } 7911 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7912 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7913 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7914 if (compute_eigs) { 7915 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7916 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7917 } 7918 } 7919 } 7920 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7921 /* print additional info */ 7922 if (pcbddc->dbg_flag) { 7923 /* waits until all processes reaches this point */ 7924 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7925 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7926 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7927 } 7928 7929 /* free memory */ 7930 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7931 PetscFunctionReturn(0); 7932 } 7933 7934 #undef __FUNCT__ 7935 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7936 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7937 { 7938 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7939 PC_IS* pcis = (PC_IS*)pc->data; 7940 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7941 IS subset,subset_mult,subset_n; 7942 PetscInt local_size,coarse_size=0; 7943 PetscInt *local_primal_indices=NULL; 7944 const PetscInt *t_local_primal_indices; 7945 PetscErrorCode ierr; 7946 7947 PetscFunctionBegin; 7948 /* Compute global number of coarse dofs */ 7949 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7950 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7951 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7952 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7953 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7954 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7955 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7956 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7957 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7958 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7959 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7960 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7961 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7962 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7963 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7964 7965 /* check numbering */ 7966 if (pcbddc->dbg_flag) { 7967 PetscScalar coarsesum,*array,*array2; 7968 PetscInt i; 7969 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7970 7971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7972 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7973 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7974 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7975 /* counter */ 7976 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7977 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7978 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7979 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7980 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7981 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7982 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7983 for (i=0;i<pcbddc->local_primal_size;i++) { 7984 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7985 } 7986 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7987 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7988 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7989 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7990 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7991 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7992 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7993 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7994 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7995 for (i=0;i<pcis->n;i++) { 7996 if (array[i] != 0.0 && array[i] != array2[i]) { 7997 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7998 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7999 set_error = PETSC_TRUE; 8000 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8001 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8002 } 8003 } 8004 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8005 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8006 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8007 for (i=0;i<pcis->n;i++) { 8008 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8009 } 8010 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8011 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8012 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8013 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8014 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8015 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8016 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8017 PetscInt *gidxs; 8018 8019 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8020 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8021 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8022 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8023 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8024 for (i=0;i<pcbddc->local_primal_size;i++) { 8025 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8026 } 8027 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8028 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8029 } 8030 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8031 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8032 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8033 } 8034 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8035 /* get back data */ 8036 *coarse_size_n = coarse_size; 8037 *local_primal_indices_n = local_primal_indices; 8038 PetscFunctionReturn(0); 8039 } 8040 8041 #undef __FUNCT__ 8042 #define __FUNCT__ "PCBDDCGlobalToLocal" 8043 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8044 { 8045 IS localis_t; 8046 PetscInt i,lsize,*idxs,n; 8047 PetscScalar *vals; 8048 PetscErrorCode ierr; 8049 8050 PetscFunctionBegin; 8051 /* get indices in local ordering exploiting local to global map */ 8052 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8053 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8054 for (i=0;i<lsize;i++) vals[i] = 1.0; 8055 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8056 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8057 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8058 if (idxs) { /* multilevel guard */ 8059 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8060 } 8061 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8062 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8063 ierr = PetscFree(vals);CHKERRQ(ierr); 8064 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8065 /* now compute set in local ordering */ 8066 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8067 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8068 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8069 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8070 for (i=0,lsize=0;i<n;i++) { 8071 if (PetscRealPart(vals[i]) > 0.5) { 8072 lsize++; 8073 } 8074 } 8075 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8076 for (i=0,lsize=0;i<n;i++) { 8077 if (PetscRealPart(vals[i]) > 0.5) { 8078 idxs[lsize++] = i; 8079 } 8080 } 8081 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8082 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8083 *localis = localis_t; 8084 PetscFunctionReturn(0); 8085 } 8086 8087 #undef __FUNCT__ 8088 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 8089 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8090 { 8091 PC_IS *pcis=(PC_IS*)pc->data; 8092 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8093 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8094 Mat S_j; 8095 PetscInt *used_xadj,*used_adjncy; 8096 PetscBool free_used_adj; 8097 PetscErrorCode ierr; 8098 8099 PetscFunctionBegin; 8100 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8101 free_used_adj = PETSC_FALSE; 8102 if (pcbddc->sub_schurs_layers == -1) { 8103 used_xadj = NULL; 8104 used_adjncy = NULL; 8105 } else { 8106 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8107 used_xadj = pcbddc->mat_graph->xadj; 8108 used_adjncy = pcbddc->mat_graph->adjncy; 8109 } else if (pcbddc->computed_rowadj) { 8110 used_xadj = pcbddc->mat_graph->xadj; 8111 used_adjncy = pcbddc->mat_graph->adjncy; 8112 } else { 8113 PetscBool flg_row=PETSC_FALSE; 8114 const PetscInt *xadj,*adjncy; 8115 PetscInt nvtxs; 8116 8117 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8118 if (flg_row) { 8119 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8120 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8121 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8122 free_used_adj = PETSC_TRUE; 8123 } else { 8124 pcbddc->sub_schurs_layers = -1; 8125 used_xadj = NULL; 8126 used_adjncy = NULL; 8127 } 8128 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8129 } 8130 } 8131 8132 /* setup sub_schurs data */ 8133 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8134 if (!sub_schurs->schur_explicit) { 8135 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8136 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8137 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8138 } else { 8139 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8140 PetscBool isseqaij,need_change = PETSC_FALSE; 8141 PetscInt benign_n; 8142 Mat change = NULL; 8143 Vec scaling = NULL; 8144 IS change_primal = NULL; 8145 8146 if (!pcbddc->use_vertices && reuse_solvers) { 8147 PetscInt n_vertices; 8148 8149 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8150 reuse_solvers = (PetscBool)!n_vertices; 8151 } 8152 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8153 if (!isseqaij) { 8154 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8155 if (matis->A == pcbddc->local_mat) { 8156 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8157 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8158 } else { 8159 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8160 } 8161 } 8162 if (!pcbddc->benign_change_explicit) { 8163 benign_n = pcbddc->benign_n; 8164 } else { 8165 benign_n = 0; 8166 } 8167 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8168 We need a global reduction to avoid possible deadlocks. 8169 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8170 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8171 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8172 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8173 need_change = (PetscBool)(!need_change); 8174 } 8175 /* If the user defines additional constraints, we import them here. 8176 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8177 if (need_change) { 8178 PC_IS *pcisf; 8179 PC_BDDC *pcbddcf; 8180 PC pcf; 8181 8182 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8183 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8184 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8185 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8186 /* hacks */ 8187 pcisf = (PC_IS*)pcf->data; 8188 pcisf->is_B_local = pcis->is_B_local; 8189 pcisf->vec1_N = pcis->vec1_N; 8190 pcisf->BtoNmap = pcis->BtoNmap; 8191 pcisf->n = pcis->n; 8192 pcisf->n_B = pcis->n_B; 8193 pcbddcf = (PC_BDDC*)pcf->data; 8194 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8195 pcbddcf->mat_graph = pcbddc->mat_graph; 8196 pcbddcf->use_faces = PETSC_TRUE; 8197 pcbddcf->use_change_of_basis = PETSC_TRUE; 8198 pcbddcf->use_change_on_faces = PETSC_TRUE; 8199 pcbddcf->use_qr_single = PETSC_TRUE; 8200 pcbddcf->fake_change = PETSC_TRUE; 8201 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8202 /* store information on primal vertices and change of basis (in local numbering) */ 8203 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8204 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8205 change = pcbddcf->ConstraintMatrix; 8206 pcbddcf->ConstraintMatrix = NULL; 8207 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8208 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8209 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8210 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8211 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8212 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8213 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8214 pcf->ops->destroy = NULL; 8215 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8216 } 8217 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8218 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8219 ierr = MatDestroy(&change);CHKERRQ(ierr); 8220 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8221 } 8222 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8223 8224 /* free adjacency */ 8225 if (free_used_adj) { 8226 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8227 } 8228 PetscFunctionReturn(0); 8229 } 8230 8231 #undef __FUNCT__ 8232 #define __FUNCT__ "PCBDDCInitSubSchurs" 8233 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8234 { 8235 PC_IS *pcis=(PC_IS*)pc->data; 8236 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8237 PCBDDCGraph graph; 8238 PetscErrorCode ierr; 8239 8240 PetscFunctionBegin; 8241 /* attach interface graph for determining subsets */ 8242 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8243 IS verticesIS,verticescomm; 8244 PetscInt vsize,*idxs; 8245 8246 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8247 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8248 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8249 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8250 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8251 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8252 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8253 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8254 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8255 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8256 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8257 } else { 8258 graph = pcbddc->mat_graph; 8259 } 8260 /* print some info */ 8261 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8262 IS vertices; 8263 PetscInt nv,nedges,nfaces; 8264 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8265 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8266 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8267 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8270 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8272 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8273 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8274 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8275 } 8276 8277 /* sub_schurs init */ 8278 if (!pcbddc->sub_schurs) { 8279 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8280 } 8281 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8282 8283 /* free graph struct */ 8284 if (pcbddc->sub_schurs_rebuild) { 8285 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8286 } 8287 PetscFunctionReturn(0); 8288 } 8289 8290 #undef __FUNCT__ 8291 #define __FUNCT__ "PCBDDCCheckOperator" 8292 PetscErrorCode PCBDDCCheckOperator(PC pc) 8293 { 8294 PC_IS *pcis=(PC_IS*)pc->data; 8295 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8296 PetscErrorCode ierr; 8297 8298 PetscFunctionBegin; 8299 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8300 IS zerodiag = NULL; 8301 Mat S_j,B0_B=NULL; 8302 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8303 PetscScalar *p0_check,*array,*array2; 8304 PetscReal norm; 8305 PetscInt i; 8306 8307 /* B0 and B0_B */ 8308 if (zerodiag) { 8309 IS dummy; 8310 8311 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8312 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8313 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8314 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8315 } 8316 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8317 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8318 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8319 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8320 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8321 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8322 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8323 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8324 /* S_j */ 8325 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8326 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8327 8328 /* mimic vector in \widetilde{W}_\Gamma */ 8329 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8330 /* continuous in primal space */ 8331 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8332 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8333 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8334 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8335 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8336 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8337 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8338 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8339 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8340 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8341 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8342 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8343 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8344 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8345 8346 /* assemble rhs for coarse problem */ 8347 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8348 /* local with Schur */ 8349 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8350 if (zerodiag) { 8351 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8352 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8353 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8354 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8355 } 8356 /* sum on primal nodes the local contributions */ 8357 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8358 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8359 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8360 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8361 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8362 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8363 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8364 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8365 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8366 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8367 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8368 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8369 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8370 /* scale primal nodes (BDDC sums contibutions) */ 8371 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8372 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8373 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8374 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8375 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8376 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8377 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8378 /* global: \widetilde{B0}_B w_\Gamma */ 8379 if (zerodiag) { 8380 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8381 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8382 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8383 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8384 } 8385 /* BDDC */ 8386 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8387 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8388 8389 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8390 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8391 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8392 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8393 for (i=0;i<pcbddc->benign_n;i++) { 8394 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8395 } 8396 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8397 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8398 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8399 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8400 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8401 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8402 } 8403 PetscFunctionReturn(0); 8404 } 8405 8406 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8407 #undef __FUNCT__ 8408 #define __FUNCT__ "MatMPIAIJRestrict" 8409 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8410 { 8411 Mat At; 8412 IS rows; 8413 PetscInt rst,ren; 8414 PetscErrorCode ierr; 8415 PetscLayout rmap; 8416 8417 PetscFunctionBegin; 8418 rst = ren = 0; 8419 if (ccomm != MPI_COMM_NULL) { 8420 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8421 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8422 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8423 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8424 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8425 } 8426 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8427 ierr = MatGetSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8428 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8429 8430 if (ccomm != MPI_COMM_NULL) { 8431 Mat_MPIAIJ *a,*b; 8432 IS from,to; 8433 Vec gvec; 8434 PetscInt lsize; 8435 8436 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8437 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8438 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8439 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8440 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8441 a = (Mat_MPIAIJ*)At->data; 8442 b = (Mat_MPIAIJ*)(*B)->data; 8443 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8444 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8445 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8446 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8447 b->A = a->A; 8448 b->B = a->B; 8449 8450 b->donotstash = a->donotstash; 8451 b->roworiented = a->roworiented; 8452 b->rowindices = 0; 8453 b->rowvalues = 0; 8454 b->getrowactive = PETSC_FALSE; 8455 8456 (*B)->rmap = rmap; 8457 (*B)->factortype = A->factortype; 8458 (*B)->assembled = PETSC_TRUE; 8459 (*B)->insertmode = NOT_SET_VALUES; 8460 (*B)->preallocated = PETSC_TRUE; 8461 8462 if (a->colmap) { 8463 #if defined(PETSC_USE_CTABLE) 8464 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8465 #else 8466 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8467 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8468 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8469 #endif 8470 } else b->colmap = 0; 8471 if (a->garray) { 8472 PetscInt len; 8473 len = a->B->cmap->n; 8474 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8475 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8476 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8477 } else b->garray = 0; 8478 8479 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8480 b->lvec = a->lvec; 8481 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8482 8483 /* cannot use VecScatterCopy */ 8484 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8485 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8486 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8487 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8488 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8489 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8490 ierr = ISDestroy(&from);CHKERRQ(ierr); 8491 ierr = ISDestroy(&to);CHKERRQ(ierr); 8492 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8493 } 8494 ierr = MatDestroy(&At);CHKERRQ(ierr); 8495 PetscFunctionReturn(0); 8496 } 8497