1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* if range is true, it returns B s.t. span{B} = range(A) 10 if range is false, it returns B s.t. range(B) _|_ range(A) */ 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseOrthogonalRangeOrComplement" 13 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 14 { 15 #if !defined(PETSC_USE_COMPLEX) 16 PetscScalar *uwork,*data,*U, ds = 0.; 17 PetscReal *sing; 18 PetscBLASInt bM,bN,lwork,lierr,di = 1; 19 PetscInt ulw,i,nr,nc,n; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 #if defined(PETSC_MISSING_LAPACK_GESVD) 24 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 25 #else 26 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 49 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 50 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 51 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 52 ierr = PetscFPTrapPop();CHKERRQ(ierr); 53 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 54 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 55 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 56 if (!rwork) { 57 ierr = PetscFree(sing);CHKERRQ(ierr); 58 } 59 if (!work) { 60 ierr = PetscFree(uwork);CHKERRQ(ierr); 61 } 62 /* create B */ 63 if (!range) { 64 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 65 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 67 } else { 68 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 69 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 70 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 71 } 72 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscFree(U);CHKERRQ(ierr); 74 #endif 75 #else /* PETSC_USE_COMPLEX */ 76 PetscFunctionBegin; 77 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 78 #endif 79 PetscFunctionReturn(0); 80 } 81 82 /* TODO REMOVE */ 83 #if defined(PRINT_GDET) 84 static int inc = 0; 85 static int lev = 0; 86 #endif 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatGetSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "PCBDDCNedelecSupport" 157 PetscErrorCode PCBDDCNedelecSupport(PC pc) 158 { 159 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 160 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 161 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 162 Vec tvec; 163 PetscSF sfv; 164 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 165 MPI_Comm comm; 166 IS lned,primals,allprimals,nedfieldlocal; 167 IS *eedges,*extrows,*extcols,*alleedges; 168 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 169 PetscScalar *vals,*work; 170 PetscReal *rwork; 171 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 172 PetscInt ne,nv,Lv,order,n,field; 173 PetscInt n_neigh,*neigh,*n_shared,**shared; 174 PetscInt i,j,extmem,cum,maxsize,nee; 175 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 176 PetscInt *sfvleaves,*sfvroots; 177 PetscInt *corners,*cedges; 178 PetscInt *ecount,**eneighs,*vcount,**vneighs; 179 #if defined(PETSC_USE_DEBUG) 180 PetscInt *emarks; 181 #endif 182 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 183 PetscErrorCode ierr; 184 185 PetscFunctionBegin; 186 /* If the discrete gradient is defined for a subset of dofs and global is true, 187 it assumes G is given in global ordering for all the dofs. 188 Otherwise, the ordering is global for the Nedelec field */ 189 order = pcbddc->nedorder; 190 conforming = pcbddc->conforming; 191 field = pcbddc->nedfield; 192 global = pcbddc->nedglobal; 193 setprimal = PETSC_FALSE; 194 print = PETSC_FALSE; 195 singular = PETSC_FALSE; 196 197 /* Command line customization */ 198 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 199 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsInt ("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 202 /* print debug info TODO: to be removed */ 203 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 204 ierr = PetscOptionsEnd();CHKERRQ(ierr); 205 206 /* Return if there are no edges in the decomposition and the problem is not singular */ 207 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 208 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 209 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 210 if (!singular) { 211 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 212 lrc[0] = PETSC_FALSE; 213 for (i=0;i<n;i++) { 214 if (PetscRealPart(vals[i]) > 2.) { 215 lrc[0] = PETSC_TRUE; 216 break; 217 } 218 } 219 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 220 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 221 if (!lrc[1]) PetscFunctionReturn(0); 222 } 223 224 /* Get Nedelec field */ 225 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 226 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 227 if (pcbddc->n_ISForDofsLocal && field >= 0) { 228 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 229 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 230 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 231 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 232 ne = n; 233 nedfieldlocal = NULL; 234 global = PETSC_TRUE; 235 } else if (field == PETSC_DECIDE) { 236 PetscInt rst,ren,*idx; 237 238 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 239 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 241 for (i=rst;i<ren;i++) { 242 PetscInt nc; 243 244 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 246 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 247 } 248 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 249 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 250 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 251 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 252 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 253 } else { 254 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 255 } 256 257 /* Sanity checks */ 258 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 259 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 260 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 261 262 /* Just set primal dofs and return */ 263 if (setprimal) { 264 IS enedfieldlocal; 265 PetscInt *eidxs; 266 267 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 268 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 269 if (nedfieldlocal) { 270 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 271 for (i=0,cum=0;i<ne;i++) { 272 if (PetscRealPart(vals[idxs[i]]) > 2.) { 273 eidxs[cum++] = idxs[i]; 274 } 275 } 276 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 277 } else { 278 for (i=0,cum=0;i<ne;i++) { 279 if (PetscRealPart(vals[i]) > 2.) { 280 eidxs[cum++] = i; 281 } 282 } 283 } 284 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 285 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 286 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 287 ierr = PetscFree(eidxs);CHKERRQ(ierr); 288 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 289 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 /* Compute some l2g maps */ 294 if (nedfieldlocal) { 295 IS is; 296 297 /* need to map from the local Nedelec field to local numbering */ 298 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 299 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 300 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 301 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 302 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 303 if (global) { 304 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 305 el2g = al2g; 306 } else { 307 IS gis; 308 309 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 310 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 311 ierr = ISDestroy(&gis);CHKERRQ(ierr); 312 } 313 ierr = ISDestroy(&is);CHKERRQ(ierr); 314 } else { 315 /* restore default */ 316 pcbddc->nedfield = -1; 317 /* one ref for the destruction of al2g, one for el2g */ 318 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 319 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 320 el2g = al2g; 321 fl2g = NULL; 322 } 323 324 /* Start communication to drop connections for interior edges (for cc analysis only) */ 325 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 326 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 327 if (nedfieldlocal) { 328 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 330 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 331 } else { 332 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 333 } 334 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 335 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 336 337 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 338 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 339 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 340 if (global) { 341 PetscInt rst; 342 343 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 344 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 345 if (matis->sf_rootdata[i] < 2) { 346 matis->sf_rootdata[cum++] = i + rst; 347 } 348 } 349 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 350 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 351 } else { 352 PetscInt *tbz; 353 354 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 355 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 356 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 357 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 for (i=0,cum=0;i<ne;i++) 359 if (matis->sf_leafdata[idxs[i]] == 1) 360 tbz[cum++] = i; 361 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 362 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 363 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 364 ierr = PetscFree(tbz);CHKERRQ(ierr); 365 } 366 } else { /* we need the entire G to infer the nullspace */ 367 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 368 G = pcbddc->discretegradient; 369 } 370 371 /* Extract subdomain relevant rows of G */ 372 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 373 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 374 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 375 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 376 ierr = ISDestroy(&lned);CHKERRQ(ierr); 377 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 378 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 379 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 380 381 /* SF for nodal dofs communications */ 382 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 383 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 384 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 386 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 388 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 389 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 390 i = singular ? 2 : 1; 391 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 392 393 /* Destroy temporary G created in MATIS format and modified G */ 394 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 395 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 396 ierr = MatDestroy(&G);CHKERRQ(ierr); 397 398 if (print) { 399 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 400 ierr = MatView(lG,NULL);CHKERRQ(ierr); 401 } 402 403 /* Save lG for values insertion in change of basis */ 404 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 405 406 /* Analyze the edge-nodes connections (duplicate lG) */ 407 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 408 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 411 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 412 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 413 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 414 /* need to import the boundary specification to ensure the 415 proper detection of coarse edges' endpoints */ 416 if (pcbddc->DirichletBoundariesLocal) { 417 IS is; 418 419 if (fl2g) { 420 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 421 } else { 422 is = pcbddc->DirichletBoundariesLocal; 423 } 424 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 425 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 426 for (i=0;i<cum;i++) { 427 if (idxs[i] >= 0) { 428 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 429 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 430 } 431 } 432 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 433 if (fl2g) { 434 ierr = ISDestroy(&is);CHKERRQ(ierr); 435 } 436 } 437 if (pcbddc->NeumannBoundariesLocal) { 438 IS is; 439 440 if (fl2g) { 441 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 442 } else { 443 is = pcbddc->NeumannBoundariesLocal; 444 } 445 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 446 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 447 for (i=0;i<cum;i++) { 448 if (idxs[i] >= 0) { 449 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 450 } 451 } 452 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 453 if (fl2g) { 454 ierr = ISDestroy(&is);CHKERRQ(ierr); 455 } 456 } 457 458 /* Count neighs per dof */ 459 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 460 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 461 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 462 for (i=1,cum=0;i<n_neigh;i++) { 463 cum += n_shared[i]; 464 for (j=0;j<n_shared[i];j++) { 465 ecount[shared[i][j]]++; 466 } 467 } 468 if (ne) { 469 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 470 } 471 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 472 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 473 for (i=1;i<n_neigh;i++) { 474 for (j=0;j<n_shared[i];j++) { 475 PetscInt k = shared[i][j]; 476 eneighs[k][ecount[k]] = neigh[i]; 477 ecount[k]++; 478 } 479 } 480 for (i=0;i<ne;i++) { 481 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 482 } 483 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 484 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 485 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 486 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 487 for (i=1,cum=0;i<n_neigh;i++) { 488 cum += n_shared[i]; 489 for (j=0;j<n_shared[i];j++) { 490 vcount[shared[i][j]]++; 491 } 492 } 493 if (nv) { 494 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 495 } 496 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 497 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 498 for (i=1;i<n_neigh;i++) { 499 for (j=0;j<n_shared[i];j++) { 500 PetscInt k = shared[i][j]; 501 vneighs[k][vcount[k]] = neigh[i]; 502 vcount[k]++; 503 } 504 } 505 for (i=0;i<nv;i++) { 506 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 507 } 508 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 509 510 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 511 for proper detection of coarse edges' endpoints */ 512 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 513 for (i=0;i<ne;i++) { 514 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 515 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 516 } 517 } 518 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 519 if (!conforming) { 520 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 521 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 522 } 523 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 524 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 525 cum = 0; 526 for (i=0;i<ne;i++) { 527 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 528 if (!PetscBTLookup(btee,i)) { 529 marks[cum++] = i; 530 continue; 531 } 532 /* set badly connected edge dofs as primal */ 533 if (!conforming) { 534 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 535 marks[cum++] = i; 536 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 537 for (j=ii[i];j<ii[i+1];j++) { 538 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 539 } 540 } else { 541 /* every edge dofs should be connected trough a certain number of nodal dofs 542 to other edge dofs belonging to coarse edges 543 - at most 2 endpoints 544 - order-1 interior nodal dofs 545 - no undefined nodal dofs (nconn < order) 546 */ 547 PetscInt ends = 0,ints = 0, undef = 0; 548 for (j=ii[i];j<ii[i+1];j++) { 549 PetscInt v = jj[j],k; 550 PetscInt nconn = iit[v+1]-iit[v]; 551 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 552 if (nconn > order) ends++; 553 else if (nconn == order) ints++; 554 else undef++; 555 } 556 if (undef || ends > 2 || ints != order -1) { 557 marks[cum++] = i; 558 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 559 for (j=ii[i];j<ii[i+1];j++) { 560 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 561 } 562 } 563 } 564 } 565 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 566 if (!order && ii[i+1] != ii[i]) { 567 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 568 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 569 } 570 } 571 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 572 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 573 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 574 if (!conforming) { 575 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 576 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 577 } 578 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 579 580 /* identify splitpoints and corner candidates */ 581 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 582 if (print) { 583 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 584 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 585 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 586 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 587 } 588 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 589 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 590 for (i=0;i<nv;i++) { 591 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 592 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 593 if (!order) { /* variable order */ 594 PetscReal vorder = 0.; 595 596 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 597 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 598 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 599 ord = 1; 600 } 601 #if defined(PETSC_USE_DEBUG) 602 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 603 #endif 604 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 605 if (PetscBTLookup(btbd,jj[j])) { 606 bdir = PETSC_TRUE; 607 break; 608 } 609 if (vc != ecount[jj[j]]) { 610 sneighs = PETSC_FALSE; 611 } else { 612 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 613 for (k=0;k<vc;k++) { 614 if (vn[k] != en[k]) { 615 sneighs = PETSC_FALSE; 616 break; 617 } 618 } 619 } 620 } 621 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 622 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 623 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 624 } else if (test == ord) { 625 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 626 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 627 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 628 } else { 629 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 630 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 631 } 632 } 633 } 634 ierr = PetscFree(ecount);CHKERRQ(ierr); 635 ierr = PetscFree(vcount);CHKERRQ(ierr); 636 if (ne) { 637 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 638 } 639 if (nv) { 640 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 641 } 642 ierr = PetscFree(eneighs);CHKERRQ(ierr); 643 ierr = PetscFree(vneighs);CHKERRQ(ierr); 644 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 645 646 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 647 if (order != 1) { 648 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 649 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 650 for (i=0;i<nv;i++) { 651 if (PetscBTLookup(btvcand,i)) { 652 PetscBool found = PETSC_FALSE; 653 for (j=ii[i];j<ii[i+1] && !found;j++) { 654 PetscInt k,e = jj[j]; 655 if (PetscBTLookup(bte,e)) continue; 656 for (k=iit[e];k<iit[e+1];k++) { 657 PetscInt v = jjt[k]; 658 if (v != i && PetscBTLookup(btvcand,v)) { 659 found = PETSC_TRUE; 660 break; 661 } 662 } 663 } 664 if (!found) { 665 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 666 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 667 } else { 668 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 669 } 670 } 671 } 672 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 673 } 674 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 675 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 676 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 677 678 /* Get the local G^T explicitly */ 679 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 680 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 681 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 682 683 /* Mark interior nodal dofs */ 684 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 685 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 686 for (i=1;i<n_neigh;i++) { 687 for (j=0;j<n_shared[i];j++) { 688 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 689 } 690 } 691 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 692 693 /* communicate corners and splitpoints */ 694 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 695 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 696 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 697 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 698 699 if (print) { 700 IS tbz; 701 702 cum = 0; 703 for (i=0;i<nv;i++) 704 if (sfvleaves[i]) 705 vmarks[cum++] = i; 706 707 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 708 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 709 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 710 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 711 } 712 713 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 714 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 715 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 716 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 717 718 /* Zero rows of lGt corresponding to identified corners 719 and interior nodal dofs */ 720 cum = 0; 721 for (i=0;i<nv;i++) { 722 if (sfvleaves[i]) { 723 vmarks[cum++] = i; 724 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 725 } 726 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 727 } 728 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 729 if (print) { 730 IS tbz; 731 732 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 733 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 734 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 735 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 736 } 737 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 738 ierr = PetscFree(vmarks);CHKERRQ(ierr); 739 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 740 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 741 742 /* Recompute G */ 743 ierr = MatDestroy(&lG);CHKERRQ(ierr); 744 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 745 if (print) { 746 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 747 ierr = MatView(lG,NULL);CHKERRQ(ierr); 748 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 749 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 750 } 751 752 /* Get primal dofs (if any) */ 753 cum = 0; 754 for (i=0;i<ne;i++) { 755 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 756 } 757 if (fl2g) { 758 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 759 } 760 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 761 if (print) { 762 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 763 ierr = ISView(primals,NULL);CHKERRQ(ierr); 764 } 765 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 766 /* TODO: what if the user passed in some of them ? */ 767 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 768 ierr = ISDestroy(&primals);CHKERRQ(ierr); 769 770 /* Compute edge connectivity */ 771 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 772 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 773 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 774 if (fl2g) { 775 PetscBT btf; 776 PetscInt *iia,*jja,*iiu,*jju; 777 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 778 779 /* create CSR for all local dofs */ 780 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 781 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 782 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 783 iiu = pcbddc->mat_graph->xadj; 784 jju = pcbddc->mat_graph->adjncy; 785 } else if (pcbddc->use_local_adj) { 786 rest = PETSC_TRUE; 787 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 788 } else { 789 free = PETSC_TRUE; 790 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 791 iiu[0] = 0; 792 for (i=0;i<n;i++) { 793 iiu[i+1] = i+1; 794 jju[i] = -1; 795 } 796 } 797 798 /* import sizes of CSR */ 799 iia[0] = 0; 800 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 801 802 /* overwrite entries corresponding to the Nedelec field */ 803 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 804 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 805 for (i=0;i<ne;i++) { 806 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 807 iia[idxs[i]+1] = ii[i+1]-ii[i]; 808 } 809 810 /* iia in CSR */ 811 for (i=0;i<n;i++) iia[i+1] += iia[i]; 812 813 /* jja in CSR */ 814 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 815 for (i=0;i<n;i++) 816 if (!PetscBTLookup(btf,i)) 817 for (j=0;j<iiu[i+1]-iiu[i];j++) 818 jja[iia[i]+j] = jju[iiu[i]+j]; 819 820 /* map edge dofs connectivity */ 821 if (jj) { 822 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 823 for (i=0;i<ne;i++) { 824 PetscInt e = idxs[i]; 825 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 826 } 827 } 828 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 829 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 830 if (rest) { 831 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 832 } 833 if (free) { 834 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 835 } 836 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 837 } else { 838 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 839 } 840 841 /* Analyze interface for edge dofs */ 842 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 843 pcbddc->mat_graph->twodim = PETSC_FALSE; 844 845 /* Get coarse edges in the edge space */ 846 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 847 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 848 849 if (fl2g) { 850 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 851 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 852 for (i=0;i<nee;i++) { 853 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 854 } 855 } else { 856 eedges = alleedges; 857 primals = allprimals; 858 } 859 860 /* Mark fine edge dofs with their coarse edge id */ 861 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 862 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 863 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 864 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 865 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 866 if (print) { 867 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 868 ierr = ISView(primals,NULL);CHKERRQ(ierr); 869 } 870 871 maxsize = 0; 872 for (i=0;i<nee;i++) { 873 PetscInt size,mark = i+1; 874 875 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 876 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 for (j=0;j<size;j++) marks[idxs[j]] = mark; 878 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 879 maxsize = PetscMax(maxsize,size); 880 } 881 882 /* Find coarse edge endpoints */ 883 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 884 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 885 for (i=0;i<nee;i++) { 886 PetscInt mark = i+1,size; 887 888 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 889 if (!size && nedfieldlocal) continue; 890 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 891 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 892 if (print) { 893 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 894 ISView(eedges[i],NULL); 895 } 896 for (j=0;j<size;j++) { 897 PetscInt k, ee = idxs[j]; 898 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 899 for (k=ii[ee];k<ii[ee+1];k++) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 901 if (PetscBTLookup(btv,jj[k])) { 902 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 903 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 904 PetscInt k2; 905 PetscBool corner = PETSC_FALSE; 906 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 907 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 908 /* it's a corner if either is connected with an edge dof belonging to a different cc or 909 if the edge dof lie on the natural part of the boundary */ 910 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 911 corner = PETSC_TRUE; 912 break; 913 } 914 } 915 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 916 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 917 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 918 } else { 919 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 920 } 921 } 922 } 923 } 924 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 925 } 926 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 927 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 928 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 929 930 /* Reset marked primal dofs */ 931 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 932 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 933 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 934 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 935 936 /* Now use the initial lG */ 937 ierr = MatDestroy(&lG);CHKERRQ(ierr); 938 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 939 lG = lGinit; 940 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 941 942 /* Compute extended cols indices */ 943 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 944 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 945 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 946 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 947 i *= maxsize; 948 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 949 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 950 eerr = PETSC_FALSE; 951 for (i=0;i<nee;i++) { 952 PetscInt size,found = 0; 953 954 cum = 0; 955 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 956 if (!size && nedfieldlocal) continue; 957 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 958 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 959 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 960 for (j=0;j<size;j++) { 961 PetscInt k,ee = idxs[j]; 962 for (k=ii[ee];k<ii[ee+1];k++) { 963 PetscInt vv = jj[k]; 964 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 965 else if (!PetscBTLookupSet(btvc,vv)) found++; 966 } 967 } 968 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 969 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 970 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 971 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 972 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 973 /* it may happen that endpoints are not defined at this point 974 if it is the case, mark this edge for a second pass */ 975 if (cum != size -1 || found != 2) { 976 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 977 if (print) { 978 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 979 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 980 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 981 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 982 } 983 eerr = PETSC_TRUE; 984 } 985 } 986 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 987 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 988 if (done) { 989 PetscInt *newprimals; 990 991 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 992 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 993 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 995 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 996 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 997 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 998 for (i=0;i<nee;i++) { 999 PetscBool has_candidates = PETSC_FALSE; 1000 if (PetscBTLookup(bter,i)) { 1001 PetscInt size,mark = i+1; 1002 1003 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1004 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1005 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1006 for (j=0;j<size;j++) { 1007 PetscInt k,ee = idxs[j]; 1008 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1009 for (k=ii[ee];k<ii[ee+1];k++) { 1010 /* set all candidates located on the edge as corners */ 1011 if (PetscBTLookup(btvcand,jj[k])) { 1012 PetscInt k2,vv = jj[k]; 1013 has_candidates = PETSC_TRUE; 1014 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1015 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1016 /* set all edge dofs connected to candidate as primals */ 1017 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1018 if (marks[jjt[k2]] == mark) { 1019 PetscInt k3,ee2 = jjt[k2]; 1020 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1021 newprimals[cum++] = ee2; 1022 /* finally set the new corners */ 1023 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1024 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1025 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1026 } 1027 } 1028 } 1029 } else { 1030 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1031 } 1032 } 1033 } 1034 if (!has_candidates) { /* circular edge */ 1035 PetscInt k, ee = idxs[0],*tmarks; 1036 1037 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1038 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1039 for (k=ii[ee];k<ii[ee+1];k++) { 1040 PetscInt k2; 1041 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1042 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1043 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1044 } 1045 for (j=0;j<size;j++) { 1046 if (tmarks[idxs[j]] > 1) { 1047 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1048 newprimals[cum++] = idxs[j]; 1049 } 1050 } 1051 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1052 } 1053 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1054 } 1055 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1056 } 1057 ierr = PetscFree(extcols);CHKERRQ(ierr); 1058 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1059 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1060 if (fl2g) { 1061 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1062 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1063 for (i=0;i<nee;i++) { 1064 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1065 } 1066 ierr = PetscFree(eedges);CHKERRQ(ierr); 1067 } 1068 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1069 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1070 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1071 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1072 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1073 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1074 pcbddc->mat_graph->twodim = PETSC_FALSE; 1075 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1076 if (fl2g) { 1077 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1078 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1079 for (i=0;i<nee;i++) { 1080 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1081 } 1082 } else { 1083 eedges = alleedges; 1084 primals = allprimals; 1085 } 1086 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1087 1088 /* Mark again */ 1089 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1090 for (i=0;i<nee;i++) { 1091 PetscInt size,mark = i+1; 1092 1093 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1094 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1096 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1097 } 1098 if (print) { 1099 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1100 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1101 } 1102 1103 /* Recompute extended cols */ 1104 eerr = PETSC_FALSE; 1105 for (i=0;i<nee;i++) { 1106 PetscInt size; 1107 1108 cum = 0; 1109 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1110 if (!size && nedfieldlocal) continue; 1111 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1112 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1113 for (j=0;j<size;j++) { 1114 PetscInt k,ee = idxs[j]; 1115 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1116 } 1117 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1118 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1119 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1120 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1121 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1122 if (cum != size -1) { 1123 if (print) { 1124 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1127 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1128 } 1129 eerr = PETSC_TRUE; 1130 } 1131 } 1132 } 1133 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1134 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1135 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1136 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1137 /* an error should not occur at this point */ 1138 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1139 1140 /* Check the number of endpoints */ 1141 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1142 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1143 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1144 for (i=0;i<nee;i++) { 1145 PetscInt size, found = 0, gc[2]; 1146 1147 /* init with defaults */ 1148 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1149 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1150 if (!size && nedfieldlocal) continue; 1151 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1152 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1153 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1154 for (j=0;j<size;j++) { 1155 PetscInt k,ee = idxs[j]; 1156 for (k=ii[ee];k<ii[ee+1];k++) { 1157 PetscInt vv = jj[k]; 1158 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1159 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1160 corners[i*2+found++] = vv; 1161 } 1162 } 1163 } 1164 if (found != 2) { 1165 PetscInt e; 1166 if (fl2g) { 1167 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1168 } else { 1169 e = idxs[0]; 1170 } 1171 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1172 } 1173 1174 /* get primal dof index on this coarse edge */ 1175 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1176 if (gc[0] > gc[1]) { 1177 PetscInt swap = corners[2*i]; 1178 corners[2*i] = corners[2*i+1]; 1179 corners[2*i+1] = swap; 1180 } 1181 cedges[i] = idxs[size-1]; 1182 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1183 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1184 } 1185 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1186 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1187 1188 #if defined(PETSC_USE_DEBUG) 1189 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1190 not interfere with neighbouring coarse edges */ 1191 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1192 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1193 for (i=0;i<nv;i++) { 1194 PetscInt emax = 0,eemax = 0; 1195 1196 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1197 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1198 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1199 for (j=1;j<nee+1;j++) { 1200 if (emax < emarks[j]) { 1201 emax = emarks[j]; 1202 eemax = j; 1203 } 1204 } 1205 /* not relevant for edges */ 1206 if (!eemax) continue; 1207 1208 for (j=ii[i];j<ii[i+1];j++) { 1209 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1210 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1211 } 1212 } 1213 } 1214 ierr = PetscFree(emarks);CHKERRQ(ierr); 1215 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1216 #endif 1217 1218 /* Compute extended rows indices for edge blocks of the change of basis */ 1219 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1220 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1221 extmem *= maxsize; 1222 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1223 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1224 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1225 for (i=0;i<nv;i++) { 1226 PetscInt mark = 0,size,start; 1227 1228 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1229 for (j=ii[i];j<ii[i+1];j++) 1230 if (marks[jj[j]] && !mark) 1231 mark = marks[jj[j]]; 1232 1233 /* not relevant */ 1234 if (!mark) continue; 1235 1236 /* import extended row */ 1237 mark--; 1238 start = mark*extmem+extrowcum[mark]; 1239 size = ii[i+1]-ii[i]; 1240 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1241 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1242 extrowcum[mark] += size; 1243 } 1244 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1245 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1246 ierr = PetscFree(marks);CHKERRQ(ierr); 1247 1248 /* Compress extrows */ 1249 cum = 0; 1250 for (i=0;i<nee;i++) { 1251 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1252 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1253 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1254 cum = PetscMax(cum,size); 1255 } 1256 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1257 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1258 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1259 1260 /* Workspace for lapack inner calls and VecSetValues */ 1261 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1262 1263 /* Create change of basis matrix (preallocation can be improved) */ 1264 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1265 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1266 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1267 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1268 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1269 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1270 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1271 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1272 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1273 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1274 1275 /* Defaults to identity */ 1276 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1277 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1278 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1279 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1280 1281 /* Create discrete gradient for the coarser level if needed */ 1282 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1283 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1284 if (pcbddc->current_level < pcbddc->max_levels) { 1285 ISLocalToGlobalMapping cel2g,cvl2g; 1286 IS wis,gwis; 1287 PetscInt cnv,cne; 1288 1289 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1290 if (fl2g) { 1291 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1292 } else { 1293 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1294 pcbddc->nedclocal = wis; 1295 } 1296 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1297 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1298 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1299 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1300 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1301 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1302 1303 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1304 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1305 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1306 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1307 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1308 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1309 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1310 1311 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1312 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1313 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1314 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1315 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1316 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1317 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1318 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1319 } 1320 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1321 1322 #if defined(PRINT_GDET) 1323 inc = 0; 1324 lev = pcbddc->current_level; 1325 #endif 1326 1327 /* Insert values in the change of basis matrix */ 1328 for (i=0;i<nee;i++) { 1329 Mat Gins = NULL, GKins = NULL; 1330 IS cornersis = NULL; 1331 PetscScalar cvals[2]; 1332 1333 if (pcbddc->nedcG) { 1334 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1335 } 1336 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1337 if (Gins && GKins) { 1338 PetscScalar *data; 1339 const PetscInt *rows,*cols; 1340 PetscInt nrh,nch,nrc,ncc; 1341 1342 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1343 /* H1 */ 1344 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1345 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1346 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1348 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1349 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1350 /* complement */ 1351 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1352 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1353 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1354 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1355 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1356 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1357 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1358 1359 /* coarse discrete gradient */ 1360 if (pcbddc->nedcG) { 1361 PetscInt cols[2]; 1362 1363 cols[0] = 2*i; 1364 cols[1] = 2*i+1; 1365 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1366 } 1367 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1368 } 1369 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1370 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1371 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1372 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1373 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1374 } 1375 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1376 1377 /* Start assembling */ 1378 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 if (pcbddc->nedcG) { 1380 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1381 } 1382 1383 /* Free */ 1384 if (fl2g) { 1385 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1386 for (i=0;i<nee;i++) { 1387 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1388 } 1389 ierr = PetscFree(eedges);CHKERRQ(ierr); 1390 } 1391 1392 /* hack mat_graph with primal dofs on the coarse edges */ 1393 { 1394 PCBDDCGraph graph = pcbddc->mat_graph; 1395 PetscInt *oqueue = graph->queue; 1396 PetscInt *ocptr = graph->cptr; 1397 PetscInt ncc,*idxs; 1398 1399 /* find first primal edge */ 1400 if (pcbddc->nedclocal) { 1401 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1402 } else { 1403 if (fl2g) { 1404 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1405 } 1406 idxs = cedges; 1407 } 1408 cum = 0; 1409 while (cum < nee && cedges[cum] < 0) cum++; 1410 1411 /* adapt connected components */ 1412 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1413 graph->cptr[0] = 0; 1414 for (i=0,ncc=0;i<graph->ncc;i++) { 1415 PetscInt lc = ocptr[i+1]-ocptr[i]; 1416 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1417 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1418 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1419 ncc++; 1420 lc--; 1421 cum++; 1422 while (cum < nee && cedges[cum] < 0) cum++; 1423 } 1424 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1425 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1426 ncc++; 1427 } 1428 graph->ncc = ncc; 1429 if (pcbddc->nedclocal) { 1430 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1431 } 1432 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1433 } 1434 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1435 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1436 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1437 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1438 1439 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1440 ierr = PetscFree(extrow);CHKERRQ(ierr); 1441 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1442 ierr = PetscFree(corners);CHKERRQ(ierr); 1443 ierr = PetscFree(cedges);CHKERRQ(ierr); 1444 ierr = PetscFree(extrows);CHKERRQ(ierr); 1445 ierr = PetscFree(extcols);CHKERRQ(ierr); 1446 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1447 1448 /* Complete assembling */ 1449 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 if (pcbddc->nedcG) { 1451 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1452 #if 0 1453 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1454 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1455 #endif 1456 } 1457 1458 /* set change of basis */ 1459 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1460 ierr = MatDestroy(&T);CHKERRQ(ierr); 1461 1462 PetscFunctionReturn(0); 1463 } 1464 1465 /* the near-null space of BDDC carries information on quadrature weights, 1466 and these can be collinear -> so cheat with MatNullSpaceCreate 1467 and create a suitable set of basis vectors first */ 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1470 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1471 { 1472 PetscErrorCode ierr; 1473 PetscInt i; 1474 1475 PetscFunctionBegin; 1476 for (i=0;i<nvecs;i++) { 1477 PetscInt first,last; 1478 1479 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1480 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1481 if (i>=first && i < last) { 1482 PetscScalar *data; 1483 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1484 if (!has_const) { 1485 data[i-first] = 1.; 1486 } else { 1487 data[2*i-first] = 1./PetscSqrtReal(2.); 1488 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1489 } 1490 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1491 } 1492 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1493 } 1494 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1495 for (i=0;i<nvecs;i++) { /* reset vectors */ 1496 PetscInt first,last; 1497 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1498 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1499 if (i>=first && i < last) { 1500 PetscScalar *data; 1501 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1502 if (!has_const) { 1503 data[i-first] = 0.; 1504 } else { 1505 data[2*i-first] = 0.; 1506 data[2*i-first+1] = 0.; 1507 } 1508 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1509 } 1510 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1511 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1518 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1519 { 1520 Mat loc_divudotp; 1521 Vec p,v,vins,quad_vec,*quad_vecs; 1522 ISLocalToGlobalMapping map; 1523 IS *faces,*edges; 1524 PetscScalar *vals; 1525 const PetscScalar *array; 1526 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1527 PetscMPIInt rank; 1528 PetscErrorCode ierr; 1529 1530 PetscFunctionBegin; 1531 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1532 if (graph->twodim) { 1533 lmaxneighs = 2; 1534 } else { 1535 lmaxneighs = 1; 1536 for (i=0;i<ne;i++) { 1537 const PetscInt *idxs; 1538 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1539 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1540 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1541 } 1542 lmaxneighs++; /* graph count does not include self */ 1543 } 1544 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1545 maxsize = 0; 1546 for (i=0;i<ne;i++) { 1547 PetscInt nn; 1548 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1549 maxsize = PetscMax(maxsize,nn); 1550 } 1551 for (i=0;i<nf;i++) { 1552 PetscInt nn; 1553 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1554 maxsize = PetscMax(maxsize,nn); 1555 } 1556 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1557 /* create vectors to hold quadrature weights */ 1558 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1559 if (!transpose) { 1560 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1561 } else { 1562 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1563 } 1564 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1565 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1566 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1567 for (i=0;i<maxneighs;i++) { 1568 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1569 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1570 } 1571 1572 /* compute local quad vec */ 1573 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1574 if (!transpose) { 1575 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1576 } else { 1577 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1578 } 1579 ierr = VecSet(p,1.);CHKERRQ(ierr); 1580 if (!transpose) { 1581 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1582 } else { 1583 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1584 } 1585 if (vl2l) { 1586 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1587 } else { 1588 vins = v; 1589 } 1590 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1591 ierr = VecDestroy(&p);CHKERRQ(ierr); 1592 1593 /* insert in global quadrature vecs */ 1594 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1595 for (i=0;i<nf;i++) { 1596 const PetscInt *idxs; 1597 PetscInt idx,nn,j; 1598 1599 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1600 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1601 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1602 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1603 idx = -(idx+1); 1604 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1605 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1606 } 1607 for (i=0;i<ne;i++) { 1608 const PetscInt *idxs; 1609 PetscInt idx,nn,j; 1610 1611 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1612 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1613 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1614 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1615 idx = -(idx+1); 1616 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1617 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1618 } 1619 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1620 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1621 if (vl2l) { 1622 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1623 } 1624 ierr = VecDestroy(&v);CHKERRQ(ierr); 1625 ierr = PetscFree(vals);CHKERRQ(ierr); 1626 1627 /* assemble near null space */ 1628 for (i=0;i<maxneighs;i++) { 1629 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1630 } 1631 for (i=0;i<maxneighs;i++) { 1632 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1633 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1634 } 1635 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1636 PetscFunctionReturn(0); 1637 } 1638 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1642 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1643 { 1644 PetscErrorCode ierr; 1645 Vec local,global; 1646 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1647 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1648 1649 PetscFunctionBegin; 1650 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1651 /* need to convert from global to local topology information and remove references to information in global ordering */ 1652 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1653 if (pcbddc->user_provided_isfordofs) { 1654 if (pcbddc->n_ISForDofs) { 1655 PetscInt i; 1656 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1657 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1658 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1659 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1660 } 1661 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1662 pcbddc->n_ISForDofs = 0; 1663 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1664 } 1665 } else { 1666 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1667 PetscInt i, n = matis->A->rmap->n; 1668 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1669 if (i > 1) { 1670 pcbddc->n_ISForDofsLocal = i; 1671 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1672 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1673 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1674 } 1675 } 1676 } else { 1677 PetscInt i; 1678 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1679 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1680 } 1681 } 1682 } 1683 1684 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1685 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1686 } else if (pcbddc->DirichletBoundariesLocal) { 1687 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1688 } 1689 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1690 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1691 } else if (pcbddc->NeumannBoundariesLocal) { 1692 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1693 } 1694 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1695 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1696 } 1697 ierr = VecDestroy(&global);CHKERRQ(ierr); 1698 ierr = VecDestroy(&local);CHKERRQ(ierr); 1699 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #undef __FUNCT__ 1704 #define __FUNCT__ "PCBDDCConsistencyCheckIS" 1705 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1706 { 1707 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1708 PetscErrorCode ierr; 1709 IS nis; 1710 const PetscInt *idxs; 1711 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1712 PetscBool *ld; 1713 1714 PetscFunctionBegin; 1715 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1716 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1717 if (mop == MPI_LAND) { 1718 /* init rootdata with true */ 1719 ld = (PetscBool*) matis->sf_rootdata; 1720 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1721 } else { 1722 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1723 } 1724 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1725 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1726 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1727 ld = (PetscBool*) matis->sf_leafdata; 1728 for (i=0;i<nd;i++) 1729 if (-1 < idxs[i] && idxs[i] < n) 1730 ld[idxs[i]] = PETSC_TRUE; 1731 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1732 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1733 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1734 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1735 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1736 if (mop == MPI_LAND) { 1737 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1738 } else { 1739 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1740 } 1741 for (i=0,nnd=0;i<n;i++) 1742 if (ld[i]) 1743 nidxs[nnd++] = i; 1744 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1745 ierr = ISDestroy(is);CHKERRQ(ierr); 1746 *is = nis; 1747 PetscFunctionReturn(0); 1748 } 1749 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1752 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1753 { 1754 PC_IS *pcis = (PC_IS*)(pc->data); 1755 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1756 PetscErrorCode ierr; 1757 1758 PetscFunctionBegin; 1759 if (!pcbddc->benign_have_null) { 1760 PetscFunctionReturn(0); 1761 } 1762 if (pcbddc->ChangeOfBasisMatrix) { 1763 Vec swap; 1764 1765 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1766 swap = pcbddc->work_change; 1767 pcbddc->work_change = r; 1768 r = swap; 1769 } 1770 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1771 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1772 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1773 ierr = VecSet(z,0.);CHKERRQ(ierr); 1774 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1775 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1776 if (pcbddc->ChangeOfBasisMatrix) { 1777 pcbddc->work_change = r; 1778 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1779 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1786 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1787 { 1788 PCBDDCBenignMatMult_ctx ctx; 1789 PetscErrorCode ierr; 1790 PetscBool apply_right,apply_left,reset_x; 1791 1792 PetscFunctionBegin; 1793 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1794 if (transpose) { 1795 apply_right = ctx->apply_left; 1796 apply_left = ctx->apply_right; 1797 } else { 1798 apply_right = ctx->apply_right; 1799 apply_left = ctx->apply_left; 1800 } 1801 reset_x = PETSC_FALSE; 1802 if (apply_right) { 1803 const PetscScalar *ax; 1804 PetscInt nl,i; 1805 1806 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1807 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1808 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1809 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1810 for (i=0;i<ctx->benign_n;i++) { 1811 PetscScalar sum,val; 1812 const PetscInt *idxs; 1813 PetscInt nz,j; 1814 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1815 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1816 sum = 0.; 1817 if (ctx->apply_p0) { 1818 val = ctx->work[idxs[nz-1]]; 1819 for (j=0;j<nz-1;j++) { 1820 sum += ctx->work[idxs[j]]; 1821 ctx->work[idxs[j]] += val; 1822 } 1823 } else { 1824 for (j=0;j<nz-1;j++) { 1825 sum += ctx->work[idxs[j]]; 1826 } 1827 } 1828 ctx->work[idxs[nz-1]] -= sum; 1829 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1830 } 1831 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1832 reset_x = PETSC_TRUE; 1833 } 1834 if (transpose) { 1835 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1836 } else { 1837 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1838 } 1839 if (reset_x) { 1840 ierr = VecResetArray(x);CHKERRQ(ierr); 1841 } 1842 if (apply_left) { 1843 PetscScalar *ay; 1844 PetscInt i; 1845 1846 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1847 for (i=0;i<ctx->benign_n;i++) { 1848 PetscScalar sum,val; 1849 const PetscInt *idxs; 1850 PetscInt nz,j; 1851 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1852 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1853 val = -ay[idxs[nz-1]]; 1854 if (ctx->apply_p0) { 1855 sum = 0.; 1856 for (j=0;j<nz-1;j++) { 1857 sum += ay[idxs[j]]; 1858 ay[idxs[j]] += val; 1859 } 1860 ay[idxs[nz-1]] += sum; 1861 } else { 1862 for (j=0;j<nz-1;j++) { 1863 ay[idxs[j]] += val; 1864 } 1865 ay[idxs[nz-1]] = 0.; 1866 } 1867 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1868 } 1869 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1870 } 1871 PetscFunctionReturn(0); 1872 } 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1876 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1877 { 1878 PetscErrorCode ierr; 1879 1880 PetscFunctionBegin; 1881 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1887 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1888 { 1889 PetscErrorCode ierr; 1890 1891 PetscFunctionBegin; 1892 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "PCBDDCBenignShellMat" 1898 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1899 { 1900 PC_IS *pcis = (PC_IS*)pc->data; 1901 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1902 PCBDDCBenignMatMult_ctx ctx; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 if (!restore) { 1907 Mat A_IB,A_BI; 1908 PetscScalar *work; 1909 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1910 1911 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1912 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1913 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1914 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1915 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1916 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1917 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1918 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1919 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1920 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1921 ctx->apply_left = PETSC_TRUE; 1922 ctx->apply_right = PETSC_FALSE; 1923 ctx->apply_p0 = PETSC_FALSE; 1924 ctx->benign_n = pcbddc->benign_n; 1925 if (reuse) { 1926 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1927 ctx->free = PETSC_FALSE; 1928 } else { /* TODO: could be optimized for successive solves */ 1929 ISLocalToGlobalMapping N_to_D; 1930 PetscInt i; 1931 1932 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1933 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1934 for (i=0;i<pcbddc->benign_n;i++) { 1935 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1936 } 1937 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1938 ctx->free = PETSC_TRUE; 1939 } 1940 ctx->A = pcis->A_IB; 1941 ctx->work = work; 1942 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1943 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1944 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1945 pcis->A_IB = A_IB; 1946 1947 /* A_BI as A_IB^T */ 1948 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1949 pcbddc->benign_original_mat = pcis->A_BI; 1950 pcis->A_BI = A_BI; 1951 } else { 1952 if (!pcbddc->benign_original_mat) { 1953 PetscFunctionReturn(0); 1954 } 1955 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1956 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1957 pcis->A_IB = ctx->A; 1958 ctx->A = NULL; 1959 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1960 pcis->A_BI = pcbddc->benign_original_mat; 1961 pcbddc->benign_original_mat = NULL; 1962 if (ctx->free) { 1963 PetscInt i; 1964 for (i=0;i<ctx->benign_n;i++) { 1965 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1966 } 1967 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1968 } 1969 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1970 ierr = PetscFree(ctx);CHKERRQ(ierr); 1971 } 1972 PetscFunctionReturn(0); 1973 } 1974 1975 /* used just in bddc debug mode */ 1976 #undef __FUNCT__ 1977 #define __FUNCT__ "PCBDDCBenignProject" 1978 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1979 { 1980 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1981 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1982 Mat An; 1983 PetscErrorCode ierr; 1984 1985 PetscFunctionBegin; 1986 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1987 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1988 if (is1) { 1989 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1990 ierr = MatDestroy(&An);CHKERRQ(ierr); 1991 } else { 1992 *B = An; 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 /* TODO: add reuse flag */ 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatSeqAIJCompress" 2000 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2001 { 2002 Mat Bt; 2003 PetscScalar *a,*bdata; 2004 const PetscInt *ii,*ij; 2005 PetscInt m,n,i,nnz,*bii,*bij; 2006 PetscBool flg_row; 2007 PetscErrorCode ierr; 2008 2009 PetscFunctionBegin; 2010 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2011 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2012 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2013 nnz = n; 2014 for (i=0;i<ii[n];i++) { 2015 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2016 } 2017 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2018 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2019 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2020 nnz = 0; 2021 bii[0] = 0; 2022 for (i=0;i<n;i++) { 2023 PetscInt j; 2024 for (j=ii[i];j<ii[i+1];j++) { 2025 PetscScalar entry = a[j]; 2026 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2027 bij[nnz] = ij[j]; 2028 bdata[nnz] = entry; 2029 nnz++; 2030 } 2031 } 2032 bii[i+1] = nnz; 2033 } 2034 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2035 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2036 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2037 { 2038 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2039 b->free_a = PETSC_TRUE; 2040 b->free_ij = PETSC_TRUE; 2041 } 2042 *B = Bt; 2043 PetscFunctionReturn(0); 2044 } 2045 2046 #undef __FUNCT__ 2047 #define __FUNCT__ "MatDetectDisconnectedComponents" 2048 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 2049 { 2050 Mat B; 2051 IS is_dummy,*cc_n; 2052 ISLocalToGlobalMapping l2gmap_dummy; 2053 PCBDDCGraph graph; 2054 PetscInt i,n; 2055 PetscInt *xadj,*adjncy; 2056 PetscInt *xadj_filtered,*adjncy_filtered; 2057 PetscBool flg_row,isseqaij; 2058 PetscErrorCode ierr; 2059 2060 PetscFunctionBegin; 2061 if (!A->rmap->N || !A->cmap->N) { 2062 *ncc = 0; 2063 *cc = NULL; 2064 PetscFunctionReturn(0); 2065 } 2066 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2067 if (!isseqaij && filter) { 2068 PetscBool isseqdense; 2069 2070 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2071 if (!isseqdense) { 2072 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2073 } else { /* TODO: rectangular case and LDA */ 2074 PetscScalar *array; 2075 PetscReal chop=1.e-6; 2076 2077 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2078 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2079 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2080 for (i=0;i<n;i++) { 2081 PetscInt j; 2082 for (j=i+1;j<n;j++) { 2083 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2084 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2085 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2086 } 2087 } 2088 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2089 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2090 } 2091 } else { 2092 B = A; 2093 } 2094 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2095 2096 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2097 if (filter) { 2098 PetscScalar *data; 2099 PetscInt j,cum; 2100 2101 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2102 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2103 cum = 0; 2104 for (i=0;i<n;i++) { 2105 PetscInt t; 2106 2107 for (j=xadj[i];j<xadj[i+1];j++) { 2108 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2109 continue; 2110 } 2111 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2112 } 2113 t = xadj_filtered[i]; 2114 xadj_filtered[i] = cum; 2115 cum += t; 2116 } 2117 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2118 } else { 2119 xadj_filtered = NULL; 2120 adjncy_filtered = NULL; 2121 } 2122 2123 /* compute local connected components using PCBDDCGraph */ 2124 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2125 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2126 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2127 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2128 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2129 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2130 if (xadj_filtered) { 2131 graph->xadj = xadj_filtered; 2132 graph->adjncy = adjncy_filtered; 2133 } else { 2134 graph->xadj = xadj; 2135 graph->adjncy = adjncy; 2136 } 2137 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2138 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2139 /* partial clean up */ 2140 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2141 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2142 if (A != B) { 2143 ierr = MatDestroy(&B);CHKERRQ(ierr); 2144 } 2145 2146 /* get back data */ 2147 if (ncc) *ncc = graph->ncc; 2148 if (cc) { 2149 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2150 for (i=0;i<graph->ncc;i++) { 2151 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2152 } 2153 *cc = cc_n; 2154 } 2155 /* clean up graph */ 2156 graph->xadj = 0; 2157 graph->adjncy = 0; 2158 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2159 PetscFunctionReturn(0); 2160 } 2161 2162 #undef __FUNCT__ 2163 #define __FUNCT__ "PCBDDCBenignCheck" 2164 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2165 { 2166 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2167 PC_IS* pcis = (PC_IS*)(pc->data); 2168 IS dirIS = NULL; 2169 PetscInt i; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2174 if (zerodiag) { 2175 Mat A; 2176 Vec vec3_N; 2177 PetscScalar *vals; 2178 const PetscInt *idxs; 2179 PetscInt nz,*count; 2180 2181 /* p0 */ 2182 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2183 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2184 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2185 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2186 for (i=0;i<nz;i++) vals[i] = 1.; 2187 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2188 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2189 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2190 /* v_I */ 2191 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2192 for (i=0;i<nz;i++) vals[i] = 0.; 2193 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2194 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2195 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2196 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2197 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2198 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2199 if (dirIS) { 2200 PetscInt n; 2201 2202 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2203 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2204 for (i=0;i<n;i++) vals[i] = 0.; 2205 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2206 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2207 } 2208 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2209 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2210 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2211 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2212 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2213 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2214 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2215 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2216 ierr = PetscFree(vals);CHKERRQ(ierr); 2217 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2218 2219 /* there should not be any pressure dofs lying on the interface */ 2220 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2221 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2222 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2223 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2224 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2225 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2226 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2227 ierr = PetscFree(count);CHKERRQ(ierr); 2228 } 2229 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2230 2231 /* check PCBDDCBenignGetOrSetP0 */ 2232 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2233 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2234 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2235 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2236 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2237 for (i=0;i<pcbddc->benign_n;i++) { 2238 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2239 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2240 } 2241 PetscFunctionReturn(0); 2242 } 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 2246 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2247 { 2248 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2249 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2250 PetscInt nz,n; 2251 PetscInt *interior_dofs,n_interior_dofs,nneu; 2252 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2253 PetscErrorCode ierr; 2254 2255 PetscFunctionBegin; 2256 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2257 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2258 for (n=0;n<pcbddc->benign_n;n++) { 2259 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2260 } 2261 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2262 pcbddc->benign_n = 0; 2263 /* if a local info on dofs is present, assumes that the last field represents "pressures" 2264 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2265 Checks if all the pressure dofs in each subdomain have a zero diagonal 2266 If not, a change of basis on pressures is not needed 2267 since the local Schur complements are already SPD 2268 */ 2269 has_null_pressures = PETSC_TRUE; 2270 have_null = PETSC_TRUE; 2271 if (pcbddc->n_ISForDofsLocal) { 2272 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2273 2274 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2275 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2276 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2277 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2278 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2279 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2280 if (!sorted) { 2281 ierr = ISSort(pressures);CHKERRQ(ierr); 2282 } 2283 } else { 2284 pressures = NULL; 2285 } 2286 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2287 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2288 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2289 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2290 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2291 if (!sorted) { 2292 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2293 } 2294 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2295 zerodiag_save = zerodiag; 2296 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2297 if (!nz) { 2298 if (n) have_null = PETSC_FALSE; 2299 has_null_pressures = PETSC_FALSE; 2300 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2301 } 2302 recompute_zerodiag = PETSC_FALSE; 2303 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2304 zerodiag_subs = NULL; 2305 pcbddc->benign_n = 0; 2306 n_interior_dofs = 0; 2307 interior_dofs = NULL; 2308 nneu = 0; 2309 if (pcbddc->NeumannBoundariesLocal) { 2310 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2311 } 2312 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2313 if (checkb) { /* need to compute interior nodes */ 2314 PetscInt n,i,j; 2315 PetscInt n_neigh,*neigh,*n_shared,**shared; 2316 PetscInt *iwork; 2317 2318 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2319 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2320 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2321 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2322 for (i=1;i<n_neigh;i++) 2323 for (j=0;j<n_shared[i];j++) 2324 iwork[shared[i][j]] += 1; 2325 for (i=0;i<n;i++) 2326 if (!iwork[i]) 2327 interior_dofs[n_interior_dofs++] = i; 2328 ierr = PetscFree(iwork);CHKERRQ(ierr); 2329 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2330 } 2331 if (has_null_pressures) { 2332 IS *subs; 2333 PetscInt nsubs,i,j,nl; 2334 const PetscInt *idxs; 2335 PetscScalar *array; 2336 Vec *work; 2337 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2338 2339 subs = pcbddc->local_subs; 2340 nsubs = pcbddc->n_local_subs; 2341 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2342 if (checkb) { 2343 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2344 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2345 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2346 /* work[0] = 1_p */ 2347 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2348 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2349 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2350 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2351 /* work[0] = 1_v */ 2352 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2353 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2354 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2355 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2356 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2357 } 2358 if (nsubs > 1) { 2359 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2360 for (i=0;i<nsubs;i++) { 2361 ISLocalToGlobalMapping l2g; 2362 IS t_zerodiag_subs; 2363 PetscInt nl; 2364 2365 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2366 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2367 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2368 if (nl) { 2369 PetscBool valid = PETSC_TRUE; 2370 2371 if (checkb) { 2372 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2373 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2374 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2375 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2376 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2377 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2378 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2379 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2380 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2381 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2382 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2383 for (j=0;j<n_interior_dofs;j++) { 2384 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2385 valid = PETSC_FALSE; 2386 break; 2387 } 2388 } 2389 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2390 } 2391 if (valid && nneu) { 2392 const PetscInt *idxs; 2393 PetscInt nzb; 2394 2395 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2396 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2397 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2398 if (nzb) valid = PETSC_FALSE; 2399 } 2400 if (valid && pressures) { 2401 IS t_pressure_subs; 2402 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2403 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2404 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2405 } 2406 if (valid) { 2407 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2408 pcbddc->benign_n++; 2409 } else { 2410 recompute_zerodiag = PETSC_TRUE; 2411 } 2412 } 2413 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2414 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2415 } 2416 } else { /* there's just one subdomain (or zero if they have not been detected */ 2417 PetscBool valid = PETSC_TRUE; 2418 2419 if (nneu) valid = PETSC_FALSE; 2420 if (valid && pressures) { 2421 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2422 } 2423 if (valid && checkb) { 2424 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2425 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2426 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2427 for (j=0;j<n_interior_dofs;j++) { 2428 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2429 valid = PETSC_FALSE; 2430 break; 2431 } 2432 } 2433 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2434 } 2435 if (valid) { 2436 pcbddc->benign_n = 1; 2437 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2438 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2439 zerodiag_subs[0] = zerodiag; 2440 } 2441 } 2442 if (checkb) { 2443 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2444 } 2445 } 2446 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2447 2448 if (!pcbddc->benign_n) { 2449 PetscInt n; 2450 2451 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2452 recompute_zerodiag = PETSC_FALSE; 2453 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2454 if (n) { 2455 has_null_pressures = PETSC_FALSE; 2456 have_null = PETSC_FALSE; 2457 } 2458 } 2459 2460 /* final check for null pressures */ 2461 if (zerodiag && pressures) { 2462 PetscInt nz,np; 2463 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2464 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2465 if (nz != np) have_null = PETSC_FALSE; 2466 } 2467 2468 if (recompute_zerodiag) { 2469 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2470 if (pcbddc->benign_n == 1) { 2471 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2472 zerodiag = zerodiag_subs[0]; 2473 } else { 2474 PetscInt i,nzn,*new_idxs; 2475 2476 nzn = 0; 2477 for (i=0;i<pcbddc->benign_n;i++) { 2478 PetscInt ns; 2479 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2480 nzn += ns; 2481 } 2482 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2483 nzn = 0; 2484 for (i=0;i<pcbddc->benign_n;i++) { 2485 PetscInt ns,*idxs; 2486 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2487 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2488 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2489 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2490 nzn += ns; 2491 } 2492 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2493 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2494 } 2495 have_null = PETSC_FALSE; 2496 } 2497 2498 /* Prepare matrix to compute no-net-flux */ 2499 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2500 Mat A,loc_divudotp; 2501 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2502 IS row,col,isused = NULL; 2503 PetscInt M,N,n,st,n_isused; 2504 2505 if (pressures) { 2506 isused = pressures; 2507 } else { 2508 isused = zerodiag_save; 2509 } 2510 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2511 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2512 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2513 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2514 n_isused = 0; 2515 if (isused) { 2516 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2517 } 2518 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2519 st = st-n_isused; 2520 if (n) { 2521 const PetscInt *gidxs; 2522 2523 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2524 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2525 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2526 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2527 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2528 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2529 } else { 2530 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2531 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2532 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2533 } 2534 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2535 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2536 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2537 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2538 ierr = ISDestroy(&row);CHKERRQ(ierr); 2539 ierr = ISDestroy(&col);CHKERRQ(ierr); 2540 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2541 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2542 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2543 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2544 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2545 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2546 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2547 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2548 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2549 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2550 } 2551 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2552 2553 /* change of basis and p0 dofs */ 2554 if (has_null_pressures) { 2555 IS zerodiagc; 2556 const PetscInt *idxs,*idxsc; 2557 PetscInt i,s,*nnz; 2558 2559 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2560 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2561 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2562 /* local change of basis for pressures */ 2563 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2564 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2565 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2566 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2567 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2568 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2569 for (i=0;i<pcbddc->benign_n;i++) { 2570 PetscInt nzs,j; 2571 2572 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2573 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2574 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2575 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2576 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2577 } 2578 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2579 ierr = PetscFree(nnz);CHKERRQ(ierr); 2580 /* set identity on velocities */ 2581 for (i=0;i<n-nz;i++) { 2582 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2583 } 2584 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2585 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2586 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2587 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2588 /* set change on pressures */ 2589 for (s=0;s<pcbddc->benign_n;s++) { 2590 PetscScalar *array; 2591 PetscInt nzs; 2592 2593 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2594 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2595 for (i=0;i<nzs-1;i++) { 2596 PetscScalar vals[2]; 2597 PetscInt cols[2]; 2598 2599 cols[0] = idxs[i]; 2600 cols[1] = idxs[nzs-1]; 2601 vals[0] = 1.; 2602 vals[1] = 1.; 2603 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2604 } 2605 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2606 for (i=0;i<nzs-1;i++) array[i] = -1.; 2607 array[nzs-1] = 1.; 2608 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2609 /* store local idxs for p0 */ 2610 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2611 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2612 ierr = PetscFree(array);CHKERRQ(ierr); 2613 } 2614 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2615 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2616 /* project if needed */ 2617 if (pcbddc->benign_change_explicit) { 2618 Mat M; 2619 2620 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2621 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2622 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2623 ierr = MatDestroy(&M);CHKERRQ(ierr); 2624 } 2625 /* store global idxs for p0 */ 2626 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2627 } 2628 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2629 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2630 2631 /* determines if the coarse solver will be singular or not */ 2632 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2633 /* determines if the problem has subdomains with 0 pressure block */ 2634 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2635 *zerodiaglocal = zerodiag; 2636 PetscFunctionReturn(0); 2637 } 2638 2639 #undef __FUNCT__ 2640 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2641 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2642 { 2643 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2644 PetscScalar *array; 2645 PetscErrorCode ierr; 2646 2647 PetscFunctionBegin; 2648 if (!pcbddc->benign_sf) { 2649 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2650 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2651 } 2652 if (get) { 2653 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2654 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2655 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2656 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2657 } else { 2658 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2659 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2660 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2661 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2662 } 2663 PetscFunctionReturn(0); 2664 } 2665 2666 #undef __FUNCT__ 2667 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2668 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2669 { 2670 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2671 PetscErrorCode ierr; 2672 2673 PetscFunctionBegin; 2674 /* TODO: add error checking 2675 - avoid nested pop (or push) calls. 2676 - cannot push before pop. 2677 - cannot call this if pcbddc->local_mat is NULL 2678 */ 2679 if (!pcbddc->benign_n) { 2680 PetscFunctionReturn(0); 2681 } 2682 if (pop) { 2683 if (pcbddc->benign_change_explicit) { 2684 IS is_p0; 2685 MatReuse reuse; 2686 2687 /* extract B_0 */ 2688 reuse = MAT_INITIAL_MATRIX; 2689 if (pcbddc->benign_B0) { 2690 reuse = MAT_REUSE_MATRIX; 2691 } 2692 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2693 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2694 /* remove rows and cols from local problem */ 2695 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2696 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2697 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2698 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2699 } else { 2700 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2701 PetscScalar *vals; 2702 PetscInt i,n,*idxs_ins; 2703 2704 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2705 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2706 if (!pcbddc->benign_B0) { 2707 PetscInt *nnz; 2708 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2709 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2710 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2711 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2712 for (i=0;i<pcbddc->benign_n;i++) { 2713 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2714 nnz[i] = n - nnz[i]; 2715 } 2716 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2717 ierr = PetscFree(nnz);CHKERRQ(ierr); 2718 } 2719 2720 for (i=0;i<pcbddc->benign_n;i++) { 2721 PetscScalar *array; 2722 PetscInt *idxs,j,nz,cum; 2723 2724 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2725 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2726 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2727 for (j=0;j<nz;j++) vals[j] = 1.; 2728 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2729 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2730 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2731 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2732 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2733 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2734 cum = 0; 2735 for (j=0;j<n;j++) { 2736 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2737 vals[cum] = array[j]; 2738 idxs_ins[cum] = j; 2739 cum++; 2740 } 2741 } 2742 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2743 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2744 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2745 } 2746 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2747 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2748 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2749 } 2750 } else { /* push */ 2751 if (pcbddc->benign_change_explicit) { 2752 PetscInt i; 2753 2754 for (i=0;i<pcbddc->benign_n;i++) { 2755 PetscScalar *B0_vals; 2756 PetscInt *B0_cols,B0_ncol; 2757 2758 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2759 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2760 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2761 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2762 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2763 } 2764 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2765 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2766 } else { 2767 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2768 } 2769 } 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2775 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2776 { 2777 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2778 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2779 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2780 PetscBLASInt *B_iwork,*B_ifail; 2781 PetscScalar *work,lwork; 2782 PetscScalar *St,*S,*eigv; 2783 PetscScalar *Sarray,*Starray; 2784 PetscReal *eigs,thresh; 2785 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2786 PetscBool allocated_S_St; 2787 #if defined(PETSC_USE_COMPLEX) 2788 PetscReal *rwork; 2789 #endif 2790 PetscErrorCode ierr; 2791 2792 PetscFunctionBegin; 2793 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2794 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2795 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2796 2797 if (pcbddc->dbg_flag) { 2798 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2799 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2800 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2801 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2802 } 2803 2804 if (pcbddc->dbg_flag) { 2805 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2806 } 2807 2808 /* max size of subsets */ 2809 mss = 0; 2810 for (i=0;i<sub_schurs->n_subs;i++) { 2811 PetscInt subset_size; 2812 2813 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2814 mss = PetscMax(mss,subset_size); 2815 } 2816 2817 /* min/max and threshold */ 2818 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2819 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2820 nmax = PetscMax(nmin,nmax); 2821 allocated_S_St = PETSC_FALSE; 2822 if (nmin) { 2823 allocated_S_St = PETSC_TRUE; 2824 } 2825 2826 /* allocate lapack workspace */ 2827 cum = cum2 = 0; 2828 maxneigs = 0; 2829 for (i=0;i<sub_schurs->n_subs;i++) { 2830 PetscInt n,subset_size; 2831 2832 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2833 n = PetscMin(subset_size,nmax); 2834 cum += subset_size; 2835 cum2 += subset_size*n; 2836 maxneigs = PetscMax(maxneigs,n); 2837 } 2838 if (mss) { 2839 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2840 PetscBLASInt B_itype = 1; 2841 PetscBLASInt B_N = mss; 2842 PetscReal zero = 0.0; 2843 PetscReal eps = 0.0; /* dlamch? */ 2844 2845 B_lwork = -1; 2846 S = NULL; 2847 St = NULL; 2848 eigs = NULL; 2849 eigv = NULL; 2850 B_iwork = NULL; 2851 B_ifail = NULL; 2852 #if defined(PETSC_USE_COMPLEX) 2853 rwork = NULL; 2854 #endif 2855 thresh = 1.0; 2856 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2857 #if defined(PETSC_USE_COMPLEX) 2858 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2859 #else 2860 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2861 #endif 2862 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2863 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2864 } else { 2865 /* TODO */ 2866 } 2867 } else { 2868 lwork = 0; 2869 } 2870 2871 nv = 0; 2872 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2873 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2874 } 2875 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2876 if (allocated_S_St) { 2877 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2878 } 2879 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2880 #if defined(PETSC_USE_COMPLEX) 2881 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2882 #endif 2883 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2884 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2885 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2886 nv+cum,&pcbddc->adaptive_constraints_idxs, 2887 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2888 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2889 2890 maxneigs = 0; 2891 cum = cumarray = 0; 2892 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2893 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2894 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2895 const PetscInt *idxs; 2896 2897 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2898 for (cum=0;cum<nv;cum++) { 2899 pcbddc->adaptive_constraints_n[cum] = 1; 2900 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2901 pcbddc->adaptive_constraints_data[cum] = 1.0; 2902 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2903 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2904 } 2905 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2906 } 2907 2908 if (mss) { /* multilevel */ 2909 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2910 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2911 } 2912 2913 thresh = pcbddc->adaptive_threshold; 2914 for (i=0;i<sub_schurs->n_subs;i++) { 2915 const PetscInt *idxs; 2916 PetscReal upper,lower; 2917 PetscInt j,subset_size,eigs_start = 0; 2918 PetscBLASInt B_N; 2919 PetscBool same_data = PETSC_FALSE; 2920 2921 if (pcbddc->use_deluxe_scaling) { 2922 upper = PETSC_MAX_REAL; 2923 lower = thresh; 2924 } else { 2925 upper = 1./thresh; 2926 lower = 0.; 2927 } 2928 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2929 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2930 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2931 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2932 if (sub_schurs->is_hermitian) { 2933 PetscInt j,k; 2934 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2935 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2936 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2937 } 2938 for (j=0;j<subset_size;j++) { 2939 for (k=j;k<subset_size;k++) { 2940 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2941 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2942 } 2943 } 2944 } else { 2945 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2946 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2947 } 2948 } else { 2949 S = Sarray + cumarray; 2950 St = Starray + cumarray; 2951 } 2952 /* see if we can save some work */ 2953 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2954 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2955 } 2956 2957 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2958 B_neigs = 0; 2959 } else { 2960 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2961 PetscBLASInt B_itype = 1; 2962 PetscBLASInt B_IL, B_IU; 2963 PetscReal eps = -1.0; /* dlamch? */ 2964 PetscInt nmin_s; 2965 PetscBool compute_range = PETSC_FALSE; 2966 2967 if (pcbddc->dbg_flag) { 2968 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2969 } 2970 2971 compute_range = PETSC_FALSE; 2972 if (thresh > 1.+PETSC_SMALL && !same_data) { 2973 compute_range = PETSC_TRUE; 2974 } 2975 2976 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2977 if (compute_range) { 2978 2979 /* ask for eigenvalues larger than thresh */ 2980 #if defined(PETSC_USE_COMPLEX) 2981 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2982 #else 2983 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2984 #endif 2985 } else if (!same_data) { 2986 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2987 B_IL = 1; 2988 #if defined(PETSC_USE_COMPLEX) 2989 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2990 #else 2991 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2992 #endif 2993 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2994 PetscInt k; 2995 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2996 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2997 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2998 nmin = nmax; 2999 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3000 for (k=0;k<nmax;k++) { 3001 eigs[k] = 1./PETSC_SMALL; 3002 eigv[k*(subset_size+1)] = 1.0; 3003 } 3004 } 3005 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3006 if (B_ierr) { 3007 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3008 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3009 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3010 } 3011 3012 if (B_neigs > nmax) { 3013 if (pcbddc->dbg_flag) { 3014 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3015 } 3016 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3017 B_neigs = nmax; 3018 } 3019 3020 nmin_s = PetscMin(nmin,B_N); 3021 if (B_neigs < nmin_s) { 3022 PetscBLASInt B_neigs2; 3023 3024 if (pcbddc->use_deluxe_scaling) { 3025 B_IL = B_N - nmin_s + 1; 3026 B_IU = B_N - B_neigs; 3027 } else { 3028 B_IL = B_neigs + 1; 3029 B_IU = nmin_s; 3030 } 3031 if (pcbddc->dbg_flag) { 3032 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3033 } 3034 if (sub_schurs->is_hermitian) { 3035 PetscInt j,k; 3036 for (j=0;j<subset_size;j++) { 3037 for (k=j;k<subset_size;k++) { 3038 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3039 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3040 } 3041 } 3042 } else { 3043 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3044 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3045 } 3046 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3047 #if defined(PETSC_USE_COMPLEX) 3048 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3049 #else 3050 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3051 #endif 3052 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3053 B_neigs += B_neigs2; 3054 } 3055 if (B_ierr) { 3056 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3057 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3058 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3059 } 3060 if (pcbddc->dbg_flag) { 3061 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3062 for (j=0;j<B_neigs;j++) { 3063 if (eigs[j] == 0.0) { 3064 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3065 } else { 3066 if (pcbddc->use_deluxe_scaling) { 3067 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3068 } else { 3069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3070 } 3071 } 3072 } 3073 } 3074 } else { 3075 /* TODO */ 3076 } 3077 } 3078 /* change the basis back to the original one */ 3079 if (sub_schurs->change) { 3080 Mat change,phi,phit; 3081 3082 if (pcbddc->dbg_flag > 1) { 3083 PetscInt ii; 3084 for (ii=0;ii<B_neigs;ii++) { 3085 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3086 for (j=0;j<B_N;j++) { 3087 #if defined(PETSC_USE_COMPLEX) 3088 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3089 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3090 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3091 #else 3092 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3093 #endif 3094 } 3095 } 3096 } 3097 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3098 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3099 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3100 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3101 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3102 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3103 } 3104 maxneigs = PetscMax(B_neigs,maxneigs); 3105 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3106 if (B_neigs) { 3107 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3108 3109 if (pcbddc->dbg_flag > 1) { 3110 PetscInt ii; 3111 for (ii=0;ii<B_neigs;ii++) { 3112 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3113 for (j=0;j<B_N;j++) { 3114 #if defined(PETSC_USE_COMPLEX) 3115 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3116 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3117 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3118 #else 3119 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3120 #endif 3121 } 3122 } 3123 } 3124 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3125 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3126 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3127 cum++; 3128 } 3129 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3130 /* shift for next computation */ 3131 cumarray += subset_size*subset_size; 3132 } 3133 if (pcbddc->dbg_flag) { 3134 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3135 } 3136 3137 if (mss) { 3138 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3139 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3140 /* destroy matrices (junk) */ 3141 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3142 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3143 } 3144 if (allocated_S_St) { 3145 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3146 } 3147 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3148 #if defined(PETSC_USE_COMPLEX) 3149 ierr = PetscFree(rwork);CHKERRQ(ierr); 3150 #endif 3151 if (pcbddc->dbg_flag) { 3152 PetscInt maxneigs_r; 3153 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3155 } 3156 PetscFunctionReturn(0); 3157 } 3158 3159 #undef __FUNCT__ 3160 #define __FUNCT__ "PCBDDCSetUpSolvers" 3161 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3162 { 3163 PetscScalar *coarse_submat_vals; 3164 PetscErrorCode ierr; 3165 3166 PetscFunctionBegin; 3167 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3168 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3169 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3170 3171 /* Setup local neumann solver ksp_R */ 3172 /* PCBDDCSetUpLocalScatters should be called first! */ 3173 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3174 3175 /* 3176 Setup local correction and local part of coarse basis. 3177 Gives back the dense local part of the coarse matrix in column major ordering 3178 */ 3179 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3180 3181 /* Compute total number of coarse nodes and setup coarse solver */ 3182 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3183 3184 /* free */ 3185 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3186 PetscFunctionReturn(0); 3187 } 3188 3189 #undef __FUNCT__ 3190 #define __FUNCT__ "PCBDDCResetCustomization" 3191 PetscErrorCode PCBDDCResetCustomization(PC pc) 3192 { 3193 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3194 PetscErrorCode ierr; 3195 3196 PetscFunctionBegin; 3197 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3198 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3199 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3200 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3201 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3202 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3203 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3204 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3205 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3206 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3207 PetscFunctionReturn(0); 3208 } 3209 3210 #undef __FUNCT__ 3211 #define __FUNCT__ "PCBDDCResetTopography" 3212 PetscErrorCode PCBDDCResetTopography(PC pc) 3213 { 3214 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3215 PetscInt i; 3216 PetscErrorCode ierr; 3217 3218 PetscFunctionBegin; 3219 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3220 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3221 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3222 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3223 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3224 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3225 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3226 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3227 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3228 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3229 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3230 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3231 for (i=0;i<pcbddc->n_local_subs;i++) { 3232 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3233 } 3234 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3235 if (pcbddc->sub_schurs) { 3236 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 3237 } 3238 pcbddc->graphanalyzed = PETSC_FALSE; 3239 pcbddc->recompute_topography = PETSC_TRUE; 3240 PetscFunctionReturn(0); 3241 } 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "PCBDDCResetSolvers" 3245 PetscErrorCode PCBDDCResetSolvers(PC pc) 3246 { 3247 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3248 PetscErrorCode ierr; 3249 3250 PetscFunctionBegin; 3251 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3252 if (pcbddc->coarse_phi_B) { 3253 PetscScalar *array; 3254 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3255 ierr = PetscFree(array);CHKERRQ(ierr); 3256 } 3257 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3258 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3259 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3260 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3261 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3262 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3263 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3264 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3265 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3266 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3267 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3268 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3269 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3270 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3271 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 3272 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 3273 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3274 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3275 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3276 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3277 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3278 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3279 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3280 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3281 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3282 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3283 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3284 if (pcbddc->benign_zerodiag_subs) { 3285 PetscInt i; 3286 for (i=0;i<pcbddc->benign_n;i++) { 3287 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3288 } 3289 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3290 } 3291 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3292 PetscFunctionReturn(0); 3293 } 3294 3295 #undef __FUNCT__ 3296 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 3297 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3298 { 3299 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3300 PC_IS *pcis = (PC_IS*)pc->data; 3301 VecType impVecType; 3302 PetscInt n_constraints,n_R,old_size; 3303 PetscErrorCode ierr; 3304 3305 PetscFunctionBegin; 3306 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3307 n_R = pcis->n - pcbddc->n_vertices; 3308 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3309 /* local work vectors (try to avoid unneeded work)*/ 3310 /* R nodes */ 3311 old_size = -1; 3312 if (pcbddc->vec1_R) { 3313 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3314 } 3315 if (n_R != old_size) { 3316 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3317 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3318 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3319 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3320 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3321 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3322 } 3323 /* local primal dofs */ 3324 old_size = -1; 3325 if (pcbddc->vec1_P) { 3326 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3327 } 3328 if (pcbddc->local_primal_size != old_size) { 3329 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3330 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3331 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3332 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3333 } 3334 /* local explicit constraints */ 3335 old_size = -1; 3336 if (pcbddc->vec1_C) { 3337 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3338 } 3339 if (n_constraints && n_constraints != old_size) { 3340 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3341 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3342 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3343 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3344 } 3345 PetscFunctionReturn(0); 3346 } 3347 3348 #undef __FUNCT__ 3349 #define __FUNCT__ "PCBDDCSetUpCorrection" 3350 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3351 { 3352 PetscErrorCode ierr; 3353 /* pointers to pcis and pcbddc */ 3354 PC_IS* pcis = (PC_IS*)pc->data; 3355 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3356 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3357 /* submatrices of local problem */ 3358 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3359 /* submatrices of local coarse problem */ 3360 Mat S_VV,S_CV,S_VC,S_CC; 3361 /* working matrices */ 3362 Mat C_CR; 3363 /* additional working stuff */ 3364 PC pc_R; 3365 Mat F; 3366 Vec dummy_vec; 3367 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3368 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3369 PetscScalar *work; 3370 PetscInt *idx_V_B; 3371 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3372 PetscInt i,n_R,n_D,n_B; 3373 3374 /* some shortcuts to scalars */ 3375 PetscScalar one=1.0,m_one=-1.0; 3376 3377 PetscFunctionBegin; 3378 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3379 3380 /* Set Non-overlapping dimensions */ 3381 n_vertices = pcbddc->n_vertices; 3382 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3383 n_B = pcis->n_B; 3384 n_D = pcis->n - n_B; 3385 n_R = pcis->n - n_vertices; 3386 3387 /* vertices in boundary numbering */ 3388 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3389 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3390 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3391 3392 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3393 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3394 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3395 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3396 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3397 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3398 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3399 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3400 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3401 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3402 3403 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3404 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3405 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3406 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3407 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3408 lda_rhs = n_R; 3409 need_benign_correction = PETSC_FALSE; 3410 if (isLU || isILU || isCHOL) { 3411 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3412 } else if (sub_schurs && sub_schurs->reuse_solver) { 3413 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3414 MatFactorType type; 3415 3416 F = reuse_solver->F; 3417 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3418 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3419 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3420 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3421 } else { 3422 F = NULL; 3423 } 3424 3425 /* allocate workspace */ 3426 n = 0; 3427 if (n_constraints) { 3428 n += lda_rhs*n_constraints; 3429 } 3430 if (n_vertices) { 3431 n = PetscMax(2*lda_rhs*n_vertices,n); 3432 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3433 } 3434 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3435 3436 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3437 dummy_vec = NULL; 3438 if (need_benign_correction && lda_rhs != n_R && F) { 3439 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3440 } 3441 3442 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3443 if (n_constraints) { 3444 Mat M1,M2,M3,C_B; 3445 IS is_aux; 3446 PetscScalar *array,*array2; 3447 3448 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3449 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3450 3451 /* Extract constraints on R nodes: C_{CR} */ 3452 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3453 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3454 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3455 3456 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3457 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3458 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3459 for (i=0;i<n_constraints;i++) { 3460 const PetscScalar *row_cmat_values; 3461 const PetscInt *row_cmat_indices; 3462 PetscInt size_of_constraint,j; 3463 3464 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3465 for (j=0;j<size_of_constraint;j++) { 3466 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3467 } 3468 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3469 } 3470 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3471 if (F) { 3472 Mat B; 3473 3474 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3475 if (need_benign_correction) { 3476 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3477 3478 /* rhs is already zero on interior dofs, no need to change the rhs */ 3479 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3480 } 3481 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3482 if (need_benign_correction) { 3483 PetscScalar *marr; 3484 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3485 3486 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3487 if (lda_rhs != n_R) { 3488 for (i=0;i<n_constraints;i++) { 3489 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3490 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3491 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3492 } 3493 } else { 3494 for (i=0;i<n_constraints;i++) { 3495 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3496 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3497 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3498 } 3499 } 3500 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3501 } 3502 ierr = MatDestroy(&B);CHKERRQ(ierr); 3503 } else { 3504 PetscScalar *marr; 3505 3506 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3507 for (i=0;i<n_constraints;i++) { 3508 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3509 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3510 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3511 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3512 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3513 } 3514 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3515 } 3516 if (!pcbddc->switch_static) { 3517 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3518 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3519 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3520 for (i=0;i<n_constraints;i++) { 3521 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3522 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3523 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3524 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3525 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3526 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3527 } 3528 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3529 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3530 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3531 } else { 3532 if (lda_rhs != n_R) { 3533 IS dummy; 3534 3535 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3536 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3537 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3538 } else { 3539 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3540 pcbddc->local_auxmat2 = local_auxmat2_R; 3541 } 3542 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3543 } 3544 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3545 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3546 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3547 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3548 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3549 if (isCHOL) { 3550 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3551 } else { 3552 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3553 } 3554 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3555 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3556 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3557 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3558 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3559 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3560 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3561 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3562 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3563 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3564 } 3565 3566 /* Get submatrices from subdomain matrix */ 3567 if (n_vertices) { 3568 IS is_aux; 3569 3570 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3571 IS tis; 3572 3573 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3574 ierr = ISSort(tis);CHKERRQ(ierr); 3575 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3576 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3577 } else { 3578 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3579 } 3580 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3581 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3582 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3583 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3584 } 3585 3586 /* Matrix of coarse basis functions (local) */ 3587 if (pcbddc->coarse_phi_B) { 3588 PetscInt on_B,on_primal,on_D=n_D; 3589 if (pcbddc->coarse_phi_D) { 3590 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3591 } 3592 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3593 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3594 PetscScalar *marray; 3595 3596 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3597 ierr = PetscFree(marray);CHKERRQ(ierr); 3598 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3599 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3600 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3601 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3602 } 3603 } 3604 3605 if (!pcbddc->coarse_phi_B) { 3606 PetscScalar *marray; 3607 3608 n = n_B*pcbddc->local_primal_size; 3609 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3610 n += n_D*pcbddc->local_primal_size; 3611 } 3612 if (!pcbddc->symmetric_primal) { 3613 n *= 2; 3614 } 3615 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3616 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3617 n = n_B*pcbddc->local_primal_size; 3618 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3620 n += n_D*pcbddc->local_primal_size; 3621 } 3622 if (!pcbddc->symmetric_primal) { 3623 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3624 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3625 n = n_B*pcbddc->local_primal_size; 3626 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3627 } 3628 } else { 3629 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3630 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3631 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3632 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3633 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3634 } 3635 } 3636 } 3637 3638 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3639 p0_lidx_I = NULL; 3640 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3641 const PetscInt *idxs; 3642 3643 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3644 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3645 for (i=0;i<pcbddc->benign_n;i++) { 3646 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3647 } 3648 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3649 } 3650 3651 /* vertices */ 3652 if (n_vertices) { 3653 3654 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3655 3656 if (n_R) { 3657 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3658 PetscBLASInt B_N,B_one = 1; 3659 PetscScalar *x,*y; 3660 PetscBool isseqaij; 3661 3662 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3663 if (need_benign_correction) { 3664 ISLocalToGlobalMapping RtoN; 3665 IS is_p0; 3666 PetscInt *idxs_p0,n; 3667 3668 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3669 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3670 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3671 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3672 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3673 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3674 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3675 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3676 } 3677 3678 if (lda_rhs == n_R) { 3679 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3680 } else { 3681 PetscScalar *av,*array; 3682 const PetscInt *xadj,*adjncy; 3683 PetscInt n; 3684 PetscBool flg_row; 3685 3686 array = work+lda_rhs*n_vertices; 3687 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3688 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3689 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3690 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3691 for (i=0;i<n;i++) { 3692 PetscInt j; 3693 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3694 } 3695 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3696 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3697 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3698 } 3699 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3700 if (need_benign_correction) { 3701 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3702 PetscScalar *marr; 3703 3704 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3705 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3706 3707 | 0 0 0 | (V) 3708 L = | 0 0 -1 | (P-p0) 3709 | 0 0 -1 | (p0) 3710 3711 */ 3712 for (i=0;i<reuse_solver->benign_n;i++) { 3713 const PetscScalar *vals; 3714 const PetscInt *idxs,*idxs_zero; 3715 PetscInt n,j,nz; 3716 3717 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3718 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3719 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3720 for (j=0;j<n;j++) { 3721 PetscScalar val = vals[j]; 3722 PetscInt k,col = idxs[j]; 3723 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3724 } 3725 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3726 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3727 } 3728 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3729 } 3730 if (F) { 3731 /* need to correct the rhs */ 3732 if (need_benign_correction) { 3733 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3734 PetscScalar *marr; 3735 3736 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3737 if (lda_rhs != n_R) { 3738 for (i=0;i<n_vertices;i++) { 3739 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3740 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3741 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3742 } 3743 } else { 3744 for (i=0;i<n_vertices;i++) { 3745 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3746 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3747 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3748 } 3749 } 3750 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3751 } 3752 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3753 /* need to correct the solution */ 3754 if (need_benign_correction) { 3755 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3756 PetscScalar *marr; 3757 3758 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3759 if (lda_rhs != n_R) { 3760 for (i=0;i<n_vertices;i++) { 3761 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3762 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3763 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3764 } 3765 } else { 3766 for (i=0;i<n_vertices;i++) { 3767 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3768 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3769 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3770 } 3771 } 3772 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3773 } 3774 } else { 3775 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3776 for (i=0;i<n_vertices;i++) { 3777 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3778 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3779 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3780 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3781 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3782 } 3783 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3784 } 3785 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3786 /* S_VV and S_CV */ 3787 if (n_constraints) { 3788 Mat B; 3789 3790 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3791 for (i=0;i<n_vertices;i++) { 3792 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3793 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3794 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3795 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3796 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3797 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3798 } 3799 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3800 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3801 ierr = MatDestroy(&B);CHKERRQ(ierr); 3802 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3803 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3804 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3805 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3806 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3807 ierr = MatDestroy(&B);CHKERRQ(ierr); 3808 } 3809 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3810 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3811 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3812 } 3813 if (lda_rhs != n_R) { 3814 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3815 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3816 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3817 } 3818 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3819 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3820 if (need_benign_correction) { 3821 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3822 PetscScalar *marr,*sums; 3823 3824 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3825 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3826 for (i=0;i<reuse_solver->benign_n;i++) { 3827 const PetscScalar *vals; 3828 const PetscInt *idxs,*idxs_zero; 3829 PetscInt n,j,nz; 3830 3831 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3832 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3833 for (j=0;j<n_vertices;j++) { 3834 PetscInt k; 3835 sums[j] = 0.; 3836 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3837 } 3838 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3839 for (j=0;j<n;j++) { 3840 PetscScalar val = vals[j]; 3841 PetscInt k; 3842 for (k=0;k<n_vertices;k++) { 3843 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3844 } 3845 } 3846 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3847 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3848 } 3849 ierr = PetscFree(sums);CHKERRQ(ierr); 3850 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3851 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3852 } 3853 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3854 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3855 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3856 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3857 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3858 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3859 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3860 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3861 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3862 } else { 3863 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3864 } 3865 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3866 3867 /* coarse basis functions */ 3868 for (i=0;i<n_vertices;i++) { 3869 PetscScalar *y; 3870 3871 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3872 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3873 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3874 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3875 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3876 y[n_B*i+idx_V_B[i]] = 1.0; 3877 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3878 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3879 3880 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3881 PetscInt j; 3882 3883 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3884 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3885 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3886 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3887 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3888 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3889 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3890 } 3891 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3892 } 3893 /* if n_R == 0 the object is not destroyed */ 3894 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3895 } 3896 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3897 3898 if (n_constraints) { 3899 Mat B; 3900 3901 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3902 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3903 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3904 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3905 if (n_vertices) { 3906 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3907 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3908 } else { 3909 Mat S_VCt; 3910 3911 if (lda_rhs != n_R) { 3912 ierr = MatDestroy(&B);CHKERRQ(ierr); 3913 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3914 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3915 } 3916 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3917 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3918 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3919 } 3920 } 3921 ierr = MatDestroy(&B);CHKERRQ(ierr); 3922 /* coarse basis functions */ 3923 for (i=0;i<n_constraints;i++) { 3924 PetscScalar *y; 3925 3926 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3927 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3928 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3929 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3930 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3931 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3932 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3933 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3934 PetscInt j; 3935 3936 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3937 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3938 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3939 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3940 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3941 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3942 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3943 } 3944 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3945 } 3946 } 3947 if (n_constraints) { 3948 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3949 } 3950 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3951 3952 /* coarse matrix entries relative to B_0 */ 3953 if (pcbddc->benign_n) { 3954 Mat B0_B,B0_BPHI; 3955 IS is_dummy; 3956 PetscScalar *data; 3957 PetscInt j; 3958 3959 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3960 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3961 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3962 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3963 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3964 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3965 for (j=0;j<pcbddc->benign_n;j++) { 3966 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3967 for (i=0;i<pcbddc->local_primal_size;i++) { 3968 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3969 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3970 } 3971 } 3972 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3973 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3974 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3975 } 3976 3977 /* compute other basis functions for non-symmetric problems */ 3978 if (!pcbddc->symmetric_primal) { 3979 Mat B_V=NULL,B_C=NULL; 3980 PetscScalar *marray; 3981 3982 if (n_constraints) { 3983 Mat S_CCT,C_CRT; 3984 3985 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3986 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3987 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3988 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3989 if (n_vertices) { 3990 Mat S_VCT; 3991 3992 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3993 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3994 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3995 } 3996 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3997 } else { 3998 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3999 } 4000 if (n_vertices && n_R) { 4001 PetscScalar *av,*marray; 4002 const PetscInt *xadj,*adjncy; 4003 PetscInt n; 4004 PetscBool flg_row; 4005 4006 /* B_V = B_V - A_VR^T */ 4007 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4008 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4009 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4010 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4011 for (i=0;i<n;i++) { 4012 PetscInt j; 4013 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4014 } 4015 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4016 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4017 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4018 } 4019 4020 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4021 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4022 for (i=0;i<n_vertices;i++) { 4023 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4024 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4025 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4026 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4027 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4028 } 4029 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4030 if (B_C) { 4031 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4032 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4033 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4034 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4035 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4036 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4037 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4038 } 4039 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4040 } 4041 /* coarse basis functions */ 4042 for (i=0;i<pcbddc->local_primal_size;i++) { 4043 PetscScalar *y; 4044 4045 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4046 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4047 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4048 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4049 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4050 if (i<n_vertices) { 4051 y[n_B*i+idx_V_B[i]] = 1.0; 4052 } 4053 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4054 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4055 4056 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4057 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4058 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4059 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4060 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4061 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4062 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4063 } 4064 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4065 } 4066 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4067 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4068 } 4069 /* free memory */ 4070 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4071 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4072 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4073 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4074 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4075 ierr = PetscFree(work);CHKERRQ(ierr); 4076 if (n_vertices) { 4077 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4078 } 4079 if (n_constraints) { 4080 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4081 } 4082 /* Checking coarse_sub_mat and coarse basis functios */ 4083 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4084 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4085 if (pcbddc->dbg_flag) { 4086 Mat coarse_sub_mat; 4087 Mat AUXMAT,TM1,TM2,TM3,TM4; 4088 Mat coarse_phi_D,coarse_phi_B; 4089 Mat coarse_psi_D,coarse_psi_B; 4090 Mat A_II,A_BB,A_IB,A_BI; 4091 Mat C_B,CPHI; 4092 IS is_dummy; 4093 Vec mones; 4094 MatType checkmattype=MATSEQAIJ; 4095 PetscReal real_value; 4096 4097 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4098 Mat A; 4099 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4100 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4101 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4102 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4103 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4104 ierr = MatDestroy(&A);CHKERRQ(ierr); 4105 } else { 4106 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4107 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4108 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4109 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4110 } 4111 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4112 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4113 if (!pcbddc->symmetric_primal) { 4114 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4115 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4116 } 4117 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4118 4119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4121 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4122 if (!pcbddc->symmetric_primal) { 4123 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4124 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4125 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4126 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4127 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4128 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4129 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4130 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4131 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4132 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4133 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4134 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4135 } else { 4136 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4137 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4138 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4139 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4140 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4141 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4142 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4143 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4144 } 4145 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4146 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4147 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4148 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4149 if (pcbddc->benign_n) { 4150 Mat B0_B,B0_BPHI; 4151 PetscScalar *data,*data2; 4152 PetscInt j; 4153 4154 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4155 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4156 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4157 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4158 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4159 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4160 for (j=0;j<pcbddc->benign_n;j++) { 4161 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4162 for (i=0;i<pcbddc->local_primal_size;i++) { 4163 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4164 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4165 } 4166 } 4167 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4168 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4169 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4170 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4171 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4172 } 4173 #if 0 4174 { 4175 PetscViewer viewer; 4176 char filename[256]; 4177 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4178 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4179 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4180 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4181 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4182 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4183 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4184 if (save_change) { 4185 Mat phi_B; 4186 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4187 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4188 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4189 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4190 } else { 4191 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4192 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4193 } 4194 if (pcbddc->coarse_phi_D) { 4195 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4196 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4197 } 4198 if (pcbddc->coarse_psi_B) { 4199 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4200 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4201 } 4202 if (pcbddc->coarse_psi_D) { 4203 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4204 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4205 } 4206 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4207 } 4208 #endif 4209 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4210 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4211 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4212 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4213 4214 /* check constraints */ 4215 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4216 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4217 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4218 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4219 } else { 4220 PetscScalar *data; 4221 Mat tmat; 4222 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4223 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4224 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4225 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4226 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4227 } 4228 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4229 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4230 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4231 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4232 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4233 if (!pcbddc->symmetric_primal) { 4234 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4235 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4236 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4237 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4239 } 4240 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4241 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4242 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4243 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4244 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4245 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4246 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4247 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4248 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4249 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4250 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4251 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4252 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4253 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4254 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4255 if (!pcbddc->symmetric_primal) { 4256 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4257 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4258 } 4259 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4260 } 4261 /* get back data */ 4262 *coarse_submat_vals_n = coarse_submat_vals; 4263 PetscFunctionReturn(0); 4264 } 4265 4266 #undef __FUNCT__ 4267 #define __FUNCT__ "MatGetSubMatrixUnsorted" 4268 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4269 { 4270 Mat *work_mat; 4271 IS isrow_s,iscol_s; 4272 PetscBool rsorted,csorted; 4273 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4274 PetscErrorCode ierr; 4275 4276 PetscFunctionBegin; 4277 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4278 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4279 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4280 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4281 4282 if (!rsorted) { 4283 const PetscInt *idxs; 4284 PetscInt *idxs_sorted,i; 4285 4286 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4287 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4288 for (i=0;i<rsize;i++) { 4289 idxs_perm_r[i] = i; 4290 } 4291 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4292 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4293 for (i=0;i<rsize;i++) { 4294 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4295 } 4296 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4297 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4298 } else { 4299 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4300 isrow_s = isrow; 4301 } 4302 4303 if (!csorted) { 4304 if (isrow == iscol) { 4305 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4306 iscol_s = isrow_s; 4307 } else { 4308 const PetscInt *idxs; 4309 PetscInt *idxs_sorted,i; 4310 4311 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4312 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4313 for (i=0;i<csize;i++) { 4314 idxs_perm_c[i] = i; 4315 } 4316 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4317 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4318 for (i=0;i<csize;i++) { 4319 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4320 } 4321 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4322 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4323 } 4324 } else { 4325 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4326 iscol_s = iscol; 4327 } 4328 4329 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4330 4331 if (!rsorted || !csorted) { 4332 Mat new_mat; 4333 IS is_perm_r,is_perm_c; 4334 4335 if (!rsorted) { 4336 PetscInt *idxs_r,i; 4337 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4338 for (i=0;i<rsize;i++) { 4339 idxs_r[idxs_perm_r[i]] = i; 4340 } 4341 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4342 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4343 } else { 4344 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4345 } 4346 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4347 4348 if (!csorted) { 4349 if (isrow_s == iscol_s) { 4350 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4351 is_perm_c = is_perm_r; 4352 } else { 4353 PetscInt *idxs_c,i; 4354 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4355 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4356 for (i=0;i<csize;i++) { 4357 idxs_c[idxs_perm_c[i]] = i; 4358 } 4359 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4360 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4361 } 4362 } else { 4363 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4364 } 4365 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4366 4367 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4368 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4369 work_mat[0] = new_mat; 4370 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4371 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4372 } 4373 4374 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4375 *B = work_mat[0]; 4376 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4377 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4378 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4379 PetscFunctionReturn(0); 4380 } 4381 4382 #undef __FUNCT__ 4383 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4384 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4385 { 4386 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4387 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4388 Mat new_mat; 4389 IS is_local,is_global; 4390 PetscInt local_size; 4391 PetscBool isseqaij; 4392 PetscErrorCode ierr; 4393 4394 PetscFunctionBegin; 4395 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4396 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4397 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4398 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4399 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4400 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4401 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4402 4403 /* check */ 4404 if (pcbddc->dbg_flag) { 4405 Vec x,x_change; 4406 PetscReal error; 4407 4408 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4409 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4410 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4411 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4412 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4413 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4414 if (!pcbddc->change_interior) { 4415 const PetscScalar *x,*y,*v; 4416 PetscReal lerror = 0.; 4417 PetscInt i; 4418 4419 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4420 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4421 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4422 for (i=0;i<local_size;i++) 4423 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4424 lerror = PetscAbsScalar(x[i]-y[i]); 4425 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4426 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4427 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4428 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4429 if (error > PETSC_SMALL) { 4430 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4431 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4432 } else { 4433 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4434 } 4435 } 4436 } 4437 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4438 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4439 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4440 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4441 if (error > PETSC_SMALL) { 4442 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4443 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4444 } else { 4445 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4446 } 4447 } 4448 ierr = VecDestroy(&x);CHKERRQ(ierr); 4449 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4450 } 4451 4452 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4453 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4454 if (isseqaij) { 4455 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4456 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4457 } else { 4458 Mat work_mat; 4459 4460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4461 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4462 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4463 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4464 } 4465 if (matis->A->symmetric_set) { 4466 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4467 #if !defined(PETSC_USE_COMPLEX) 4468 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4469 #endif 4470 } 4471 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4472 PetscFunctionReturn(0); 4473 } 4474 4475 #undef __FUNCT__ 4476 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4477 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4478 { 4479 PC_IS* pcis = (PC_IS*)(pc->data); 4480 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4481 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4482 PetscInt *idx_R_local=NULL; 4483 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4484 PetscInt vbs,bs; 4485 PetscBT bitmask=NULL; 4486 PetscErrorCode ierr; 4487 4488 PetscFunctionBegin; 4489 /* 4490 No need to setup local scatters if 4491 - primal space is unchanged 4492 AND 4493 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4494 AND 4495 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4496 */ 4497 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4498 PetscFunctionReturn(0); 4499 } 4500 /* destroy old objects */ 4501 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4502 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4503 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4504 /* Set Non-overlapping dimensions */ 4505 n_B = pcis->n_B; 4506 n_D = pcis->n - n_B; 4507 n_vertices = pcbddc->n_vertices; 4508 4509 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4510 4511 /* create auxiliary bitmask and allocate workspace */ 4512 if (!sub_schurs || !sub_schurs->reuse_solver) { 4513 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4514 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4515 for (i=0;i<n_vertices;i++) { 4516 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4517 } 4518 4519 for (i=0, n_R=0; i<pcis->n; i++) { 4520 if (!PetscBTLookup(bitmask,i)) { 4521 idx_R_local[n_R++] = i; 4522 } 4523 } 4524 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4525 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4526 4527 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4528 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4529 } 4530 4531 /* Block code */ 4532 vbs = 1; 4533 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4534 if (bs>1 && !(n_vertices%bs)) { 4535 PetscBool is_blocked = PETSC_TRUE; 4536 PetscInt *vary; 4537 if (!sub_schurs || !sub_schurs->reuse_solver) { 4538 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4539 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4540 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4541 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4542 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4543 for (i=0; i<pcis->n/bs; i++) { 4544 if (vary[i]!=0 && vary[i]!=bs) { 4545 is_blocked = PETSC_FALSE; 4546 break; 4547 } 4548 } 4549 ierr = PetscFree(vary);CHKERRQ(ierr); 4550 } else { 4551 /* Verify directly the R set */ 4552 for (i=0; i<n_R/bs; i++) { 4553 PetscInt j,node=idx_R_local[bs*i]; 4554 for (j=1; j<bs; j++) { 4555 if (node != idx_R_local[bs*i+j]-j) { 4556 is_blocked = PETSC_FALSE; 4557 break; 4558 } 4559 } 4560 } 4561 } 4562 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4563 vbs = bs; 4564 for (i=0;i<n_R/vbs;i++) { 4565 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4566 } 4567 } 4568 } 4569 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4570 if (sub_schurs && sub_schurs->reuse_solver) { 4571 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4572 4573 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4574 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4575 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4576 reuse_solver->is_R = pcbddc->is_R_local; 4577 } else { 4578 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4579 } 4580 4581 /* print some info if requested */ 4582 if (pcbddc->dbg_flag) { 4583 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4584 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4585 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4586 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4587 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4589 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4590 } 4591 4592 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4593 if (!sub_schurs || !sub_schurs->reuse_solver) { 4594 IS is_aux1,is_aux2; 4595 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4596 4597 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4598 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4599 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4600 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4601 for (i=0; i<n_D; i++) { 4602 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4603 } 4604 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4605 for (i=0, j=0; i<n_R; i++) { 4606 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4607 aux_array1[j++] = i; 4608 } 4609 } 4610 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4611 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4612 for (i=0, j=0; i<n_B; i++) { 4613 if (!PetscBTLookup(bitmask,is_indices[i])) { 4614 aux_array2[j++] = i; 4615 } 4616 } 4617 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4618 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4619 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4620 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4621 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4622 4623 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4624 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4625 for (i=0, j=0; i<n_R; i++) { 4626 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4627 aux_array1[j++] = i; 4628 } 4629 } 4630 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4631 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4632 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4633 } 4634 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4635 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4636 } else { 4637 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4638 IS tis; 4639 PetscInt schur_size; 4640 4641 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4642 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4643 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4644 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4645 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4646 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4647 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4648 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4649 } 4650 } 4651 PetscFunctionReturn(0); 4652 } 4653 4654 4655 #undef __FUNCT__ 4656 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4657 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4658 { 4659 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4660 PC_IS *pcis = (PC_IS*)pc->data; 4661 PC pc_temp; 4662 Mat A_RR; 4663 MatReuse reuse; 4664 PetscScalar m_one = -1.0; 4665 PetscReal value; 4666 PetscInt n_D,n_R; 4667 PetscBool check_corr[2],issbaij; 4668 PetscErrorCode ierr; 4669 /* prefixes stuff */ 4670 char dir_prefix[256],neu_prefix[256],str_level[16]; 4671 size_t len; 4672 4673 PetscFunctionBegin; 4674 4675 /* compute prefixes */ 4676 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4677 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4678 if (!pcbddc->current_level) { 4679 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4680 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4681 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4682 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4683 } else { 4684 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4685 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4686 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4687 len -= 15; /* remove "pc_bddc_coarse_" */ 4688 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4689 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4690 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4691 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4692 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4693 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4694 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4695 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4696 } 4697 4698 /* DIRICHLET PROBLEM */ 4699 if (dirichlet) { 4700 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4701 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4702 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4703 if (pcbddc->dbg_flag) { 4704 Mat A_IIn; 4705 4706 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4707 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4708 pcis->A_II = A_IIn; 4709 } 4710 } 4711 if (pcbddc->local_mat->symmetric_set) { 4712 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4713 } 4714 /* Matrix for Dirichlet problem is pcis->A_II */ 4715 n_D = pcis->n - pcis->n_B; 4716 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4717 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4718 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4719 /* default */ 4720 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4721 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4722 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4723 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4724 if (issbaij) { 4725 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4726 } else { 4727 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4728 } 4729 /* Allow user's customization */ 4730 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4731 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4732 } 4733 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4734 if (sub_schurs && sub_schurs->reuse_solver) { 4735 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4736 4737 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4738 } 4739 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4740 if (!n_D) { 4741 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4742 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4743 } 4744 /* Set Up KSP for Dirichlet problem of BDDC */ 4745 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4746 /* set ksp_D into pcis data */ 4747 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4748 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4749 pcis->ksp_D = pcbddc->ksp_D; 4750 } 4751 4752 /* NEUMANN PROBLEM */ 4753 A_RR = 0; 4754 if (neumann) { 4755 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4756 PetscInt ibs,mbs; 4757 PetscBool issbaij; 4758 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4759 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4760 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4761 if (pcbddc->ksp_R) { /* already created ksp */ 4762 PetscInt nn_R; 4763 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4764 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4765 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4766 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4767 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4768 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4769 reuse = MAT_INITIAL_MATRIX; 4770 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4771 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4772 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4773 reuse = MAT_INITIAL_MATRIX; 4774 } else { /* safe to reuse the matrix */ 4775 reuse = MAT_REUSE_MATRIX; 4776 } 4777 } 4778 /* last check */ 4779 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4780 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4781 reuse = MAT_INITIAL_MATRIX; 4782 } 4783 } else { /* first time, so we need to create the matrix */ 4784 reuse = MAT_INITIAL_MATRIX; 4785 } 4786 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4787 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4788 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4789 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4790 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4791 if (matis->A == pcbddc->local_mat) { 4792 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4793 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4794 } else { 4795 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4796 } 4797 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4798 if (matis->A == pcbddc->local_mat) { 4799 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4800 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4801 } else { 4802 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4803 } 4804 } 4805 /* extract A_RR */ 4806 if (sub_schurs && sub_schurs->reuse_solver) { 4807 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4808 4809 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4810 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4811 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4812 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4813 } else { 4814 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4815 } 4816 } else { 4817 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4818 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4819 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4820 } 4821 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4822 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4823 } 4824 if (pcbddc->local_mat->symmetric_set) { 4825 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4826 } 4827 if (!pcbddc->ksp_R) { /* create object if not present */ 4828 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4829 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4830 /* default */ 4831 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4832 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4833 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4834 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4835 if (issbaij) { 4836 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4837 } else { 4838 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4839 } 4840 /* Allow user's customization */ 4841 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4842 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4843 } 4844 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4845 if (!n_R) { 4846 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4847 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4848 } 4849 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4850 /* Reuse solver if it is present */ 4851 if (sub_schurs && sub_schurs->reuse_solver) { 4852 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4853 4854 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4855 } 4856 /* Set Up KSP for Neumann problem of BDDC */ 4857 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4858 } 4859 4860 if (pcbddc->dbg_flag) { 4861 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4862 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4863 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4864 } 4865 4866 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4867 check_corr[0] = check_corr[1] = PETSC_FALSE; 4868 if (pcbddc->NullSpace_corr[0]) { 4869 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4870 } 4871 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4872 check_corr[0] = PETSC_TRUE; 4873 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4874 } 4875 if (neumann && pcbddc->NullSpace_corr[2]) { 4876 check_corr[1] = PETSC_TRUE; 4877 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4878 } 4879 4880 /* check Dirichlet and Neumann solvers */ 4881 if (pcbddc->dbg_flag) { 4882 if (dirichlet) { /* Dirichlet */ 4883 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4884 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4885 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4886 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4887 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4888 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4889 if (check_corr[0]) { 4890 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4891 } 4892 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4893 } 4894 if (neumann) { /* Neumann */ 4895 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4896 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4897 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4898 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4899 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4900 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4901 if (check_corr[1]) { 4902 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4903 } 4904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4905 } 4906 } 4907 /* free Neumann problem's matrix */ 4908 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4909 PetscFunctionReturn(0); 4910 } 4911 4912 #undef __FUNCT__ 4913 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4914 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4915 { 4916 PetscErrorCode ierr; 4917 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4918 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4919 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4920 4921 PetscFunctionBegin; 4922 if (!reuse_solver) { 4923 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4924 } 4925 if (!pcbddc->switch_static) { 4926 if (applytranspose && pcbddc->local_auxmat1) { 4927 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4928 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4929 } 4930 if (!reuse_solver) { 4931 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4932 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4933 } else { 4934 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4935 4936 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4937 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4938 } 4939 } else { 4940 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4941 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4942 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4943 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4944 if (applytranspose && pcbddc->local_auxmat1) { 4945 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4946 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4947 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4948 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4949 } 4950 } 4951 if (!reuse_solver || pcbddc->switch_static) { 4952 if (applytranspose) { 4953 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4954 } else { 4955 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4956 } 4957 } else { 4958 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4959 4960 if (applytranspose) { 4961 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4962 } else { 4963 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4964 } 4965 } 4966 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4967 if (!pcbddc->switch_static) { 4968 if (!reuse_solver) { 4969 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4970 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4971 } else { 4972 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4973 4974 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4975 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4976 } 4977 if (!applytranspose && pcbddc->local_auxmat1) { 4978 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4979 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4980 } 4981 } else { 4982 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4983 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4984 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4985 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4986 if (!applytranspose && pcbddc->local_auxmat1) { 4987 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4988 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4989 } 4990 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4991 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4992 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4993 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4994 } 4995 PetscFunctionReturn(0); 4996 } 4997 4998 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4999 #undef __FUNCT__ 5000 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 5001 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5002 { 5003 PetscErrorCode ierr; 5004 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5005 PC_IS* pcis = (PC_IS*) (pc->data); 5006 const PetscScalar zero = 0.0; 5007 5008 PetscFunctionBegin; 5009 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5010 if (!pcbddc->benign_apply_coarse_only) { 5011 if (applytranspose) { 5012 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5013 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5014 } else { 5015 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5016 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5017 } 5018 } else { 5019 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5020 } 5021 5022 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5023 if (pcbddc->benign_n) { 5024 PetscScalar *array; 5025 PetscInt j; 5026 5027 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5028 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5029 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5030 } 5031 5032 /* start communications from local primal nodes to rhs of coarse solver */ 5033 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5034 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5035 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5036 5037 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5038 if (pcbddc->coarse_ksp) { 5039 Mat coarse_mat; 5040 Vec rhs,sol; 5041 MatNullSpace nullsp; 5042 PetscBool isbddc = PETSC_FALSE; 5043 5044 if (pcbddc->benign_have_null) { 5045 PC coarse_pc; 5046 5047 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5048 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5049 /* we need to propagate to coarser levels the need for a possible benign correction */ 5050 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5051 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5052 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5053 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5054 } 5055 } 5056 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5057 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5058 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5059 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5060 if (nullsp) { 5061 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5062 } 5063 if (applytranspose) { 5064 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5065 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5066 } else { 5067 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5068 PC coarse_pc; 5069 5070 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5071 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5072 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5073 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5074 } else { 5075 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5076 } 5077 } 5078 /* we don't need the benign correction at coarser levels anymore */ 5079 if (pcbddc->benign_have_null && isbddc) { 5080 PC coarse_pc; 5081 PC_BDDC* coarsepcbddc; 5082 5083 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5084 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5085 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5086 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5087 } 5088 if (nullsp) { 5089 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5090 } 5091 } 5092 5093 /* Local solution on R nodes */ 5094 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5095 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5096 } 5097 /* communications from coarse sol to local primal nodes */ 5098 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5099 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5100 5101 /* Sum contributions from the two levels */ 5102 if (!pcbddc->benign_apply_coarse_only) { 5103 if (applytranspose) { 5104 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5105 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5106 } else { 5107 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5108 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5109 } 5110 /* store p0 */ 5111 if (pcbddc->benign_n) { 5112 PetscScalar *array; 5113 PetscInt j; 5114 5115 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5116 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5117 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5118 } 5119 } else { /* expand the coarse solution */ 5120 if (applytranspose) { 5121 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5122 } else { 5123 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5124 } 5125 } 5126 PetscFunctionReturn(0); 5127 } 5128 5129 #undef __FUNCT__ 5130 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 5131 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5132 { 5133 PetscErrorCode ierr; 5134 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5135 PetscScalar *array; 5136 Vec from,to; 5137 5138 PetscFunctionBegin; 5139 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5140 from = pcbddc->coarse_vec; 5141 to = pcbddc->vec1_P; 5142 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5143 Vec tvec; 5144 5145 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5146 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5147 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5148 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5149 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5150 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5151 } 5152 } else { /* from local to global -> put data in coarse right hand side */ 5153 from = pcbddc->vec1_P; 5154 to = pcbddc->coarse_vec; 5155 } 5156 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5157 PetscFunctionReturn(0); 5158 } 5159 5160 #undef __FUNCT__ 5161 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 5162 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5163 { 5164 PetscErrorCode ierr; 5165 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5166 PetscScalar *array; 5167 Vec from,to; 5168 5169 PetscFunctionBegin; 5170 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5171 from = pcbddc->coarse_vec; 5172 to = pcbddc->vec1_P; 5173 } else { /* from local to global -> put data in coarse right hand side */ 5174 from = pcbddc->vec1_P; 5175 to = pcbddc->coarse_vec; 5176 } 5177 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5178 if (smode == SCATTER_FORWARD) { 5179 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5180 Vec tvec; 5181 5182 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5183 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5184 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5185 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5186 } 5187 } else { 5188 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5189 ierr = VecResetArray(from);CHKERRQ(ierr); 5190 } 5191 } 5192 PetscFunctionReturn(0); 5193 } 5194 5195 /* uncomment for testing purposes */ 5196 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5197 #undef __FUNCT__ 5198 #define __FUNCT__ "PCBDDCConstraintsSetUp" 5199 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5200 { 5201 PetscErrorCode ierr; 5202 PC_IS* pcis = (PC_IS*)(pc->data); 5203 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5204 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5205 /* one and zero */ 5206 PetscScalar one=1.0,zero=0.0; 5207 /* space to store constraints and their local indices */ 5208 PetscScalar *constraints_data; 5209 PetscInt *constraints_idxs,*constraints_idxs_B; 5210 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5211 PetscInt *constraints_n; 5212 /* iterators */ 5213 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5214 /* BLAS integers */ 5215 PetscBLASInt lwork,lierr; 5216 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5217 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5218 /* reuse */ 5219 PetscInt olocal_primal_size,olocal_primal_size_cc; 5220 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5221 /* change of basis */ 5222 PetscBool qr_needed; 5223 PetscBT change_basis,qr_needed_idx; 5224 /* auxiliary stuff */ 5225 PetscInt *nnz,*is_indices; 5226 PetscInt ncc; 5227 /* some quantities */ 5228 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5229 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5230 5231 PetscFunctionBegin; 5232 /* Destroy Mat objects computed previously */ 5233 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5234 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5235 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5236 /* save info on constraints from previous setup (if any) */ 5237 olocal_primal_size = pcbddc->local_primal_size; 5238 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5239 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5240 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5241 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5242 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5243 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5244 5245 if (!pcbddc->adaptive_selection) { 5246 IS ISForVertices,*ISForFaces,*ISForEdges; 5247 MatNullSpace nearnullsp; 5248 const Vec *nearnullvecs; 5249 Vec *localnearnullsp; 5250 PetscScalar *array; 5251 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5252 PetscBool nnsp_has_cnst; 5253 /* LAPACK working arrays for SVD or POD */ 5254 PetscBool skip_lapack,boolforchange; 5255 PetscScalar *work; 5256 PetscReal *singular_vals; 5257 #if defined(PETSC_USE_COMPLEX) 5258 PetscReal *rwork; 5259 #endif 5260 #if defined(PETSC_MISSING_LAPACK_GESVD) 5261 PetscScalar *temp_basis,*correlation_mat; 5262 #else 5263 PetscBLASInt dummy_int=1; 5264 PetscScalar dummy_scalar=1.; 5265 #endif 5266 5267 /* Get index sets for faces, edges and vertices from graph */ 5268 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5269 /* print some info */ 5270 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5271 PetscInt nv; 5272 5273 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5274 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5275 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5279 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5280 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5281 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5282 } 5283 5284 /* free unneeded index sets */ 5285 if (!pcbddc->use_vertices) { 5286 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5287 } 5288 if (!pcbddc->use_edges) { 5289 for (i=0;i<n_ISForEdges;i++) { 5290 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5291 } 5292 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5293 n_ISForEdges = 0; 5294 } 5295 if (!pcbddc->use_faces) { 5296 for (i=0;i<n_ISForFaces;i++) { 5297 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5298 } 5299 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5300 n_ISForFaces = 0; 5301 } 5302 5303 /* check if near null space is attached to global mat */ 5304 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5305 if (nearnullsp) { 5306 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5307 /* remove any stored info */ 5308 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5309 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5310 /* store information for BDDC solver reuse */ 5311 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5312 pcbddc->onearnullspace = nearnullsp; 5313 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5314 for (i=0;i<nnsp_size;i++) { 5315 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5316 } 5317 } else { /* if near null space is not provided BDDC uses constants by default */ 5318 nnsp_size = 0; 5319 nnsp_has_cnst = PETSC_TRUE; 5320 } 5321 /* get max number of constraints on a single cc */ 5322 max_constraints = nnsp_size; 5323 if (nnsp_has_cnst) max_constraints++; 5324 5325 /* 5326 Evaluate maximum storage size needed by the procedure 5327 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5328 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5329 There can be multiple constraints per connected component 5330 */ 5331 n_vertices = 0; 5332 if (ISForVertices) { 5333 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5334 } 5335 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5336 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5337 5338 total_counts = n_ISForFaces+n_ISForEdges; 5339 total_counts *= max_constraints; 5340 total_counts += n_vertices; 5341 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5342 5343 total_counts = 0; 5344 max_size_of_constraint = 0; 5345 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5346 IS used_is; 5347 if (i<n_ISForEdges) { 5348 used_is = ISForEdges[i]; 5349 } else { 5350 used_is = ISForFaces[i-n_ISForEdges]; 5351 } 5352 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5353 total_counts += j; 5354 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5355 } 5356 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5357 5358 /* get local part of global near null space vectors */ 5359 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5360 for (k=0;k<nnsp_size;k++) { 5361 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5362 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5363 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5364 } 5365 5366 /* whether or not to skip lapack calls */ 5367 skip_lapack = PETSC_TRUE; 5368 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5369 5370 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5371 if (!skip_lapack) { 5372 PetscScalar temp_work; 5373 5374 #if defined(PETSC_MISSING_LAPACK_GESVD) 5375 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5376 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5377 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5378 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5379 #if defined(PETSC_USE_COMPLEX) 5380 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5381 #endif 5382 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5383 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5384 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5385 lwork = -1; 5386 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5387 #if !defined(PETSC_USE_COMPLEX) 5388 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5389 #else 5390 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5391 #endif 5392 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5393 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5394 #else /* on missing GESVD */ 5395 /* SVD */ 5396 PetscInt max_n,min_n; 5397 max_n = max_size_of_constraint; 5398 min_n = max_constraints; 5399 if (max_size_of_constraint < max_constraints) { 5400 min_n = max_size_of_constraint; 5401 max_n = max_constraints; 5402 } 5403 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5404 #if defined(PETSC_USE_COMPLEX) 5405 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5406 #endif 5407 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5408 lwork = -1; 5409 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5410 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5411 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5412 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5413 #if !defined(PETSC_USE_COMPLEX) 5414 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5415 #else 5416 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5417 #endif 5418 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5419 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5420 #endif /* on missing GESVD */ 5421 /* Allocate optimal workspace */ 5422 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5423 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5424 } 5425 /* Now we can loop on constraining sets */ 5426 total_counts = 0; 5427 constraints_idxs_ptr[0] = 0; 5428 constraints_data_ptr[0] = 0; 5429 /* vertices */ 5430 if (n_vertices) { 5431 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5432 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5433 for (i=0;i<n_vertices;i++) { 5434 constraints_n[total_counts] = 1; 5435 constraints_data[total_counts] = 1.0; 5436 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5437 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5438 total_counts++; 5439 } 5440 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5441 n_vertices = total_counts; 5442 } 5443 5444 /* edges and faces */ 5445 total_counts_cc = total_counts; 5446 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5447 IS used_is; 5448 PetscBool idxs_copied = PETSC_FALSE; 5449 5450 if (ncc<n_ISForEdges) { 5451 used_is = ISForEdges[ncc]; 5452 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5453 } else { 5454 used_is = ISForFaces[ncc-n_ISForEdges]; 5455 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5456 } 5457 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5458 5459 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5460 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5461 /* change of basis should not be performed on local periodic nodes */ 5462 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5463 if (nnsp_has_cnst) { 5464 PetscScalar quad_value; 5465 5466 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5467 idxs_copied = PETSC_TRUE; 5468 5469 if (!pcbddc->use_nnsp_true) { 5470 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5471 } else { 5472 quad_value = 1.0; 5473 } 5474 for (j=0;j<size_of_constraint;j++) { 5475 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5476 } 5477 temp_constraints++; 5478 total_counts++; 5479 } 5480 for (k=0;k<nnsp_size;k++) { 5481 PetscReal real_value; 5482 PetscScalar *ptr_to_data; 5483 5484 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5485 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5486 for (j=0;j<size_of_constraint;j++) { 5487 ptr_to_data[j] = array[is_indices[j]]; 5488 } 5489 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5490 /* check if array is null on the connected component */ 5491 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5492 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5493 if (real_value > 0.0) { /* keep indices and values */ 5494 temp_constraints++; 5495 total_counts++; 5496 if (!idxs_copied) { 5497 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5498 idxs_copied = PETSC_TRUE; 5499 } 5500 } 5501 } 5502 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5503 valid_constraints = temp_constraints; 5504 if (!pcbddc->use_nnsp_true && temp_constraints) { 5505 if (temp_constraints == 1) { /* just normalize the constraint */ 5506 PetscScalar norm,*ptr_to_data; 5507 5508 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5510 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5511 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5512 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5513 } else { /* perform SVD */ 5514 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5515 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5516 5517 #if defined(PETSC_MISSING_LAPACK_GESVD) 5518 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5519 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5520 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5521 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5522 from that computed using LAPACKgesvd 5523 -> This is due to a different computation of eigenvectors in LAPACKheev 5524 -> The quality of the POD-computed basis will be the same */ 5525 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5526 /* Store upper triangular part of correlation matrix */ 5527 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5528 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5529 for (j=0;j<temp_constraints;j++) { 5530 for (k=0;k<j+1;k++) { 5531 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5532 } 5533 } 5534 /* compute eigenvalues and eigenvectors of correlation matrix */ 5535 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5536 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5537 #if !defined(PETSC_USE_COMPLEX) 5538 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5539 #else 5540 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5541 #endif 5542 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5543 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5544 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5545 j = 0; 5546 while (j < temp_constraints && singular_vals[j] < tol) j++; 5547 total_counts = total_counts-j; 5548 valid_constraints = temp_constraints-j; 5549 /* scale and copy POD basis into used quadrature memory */ 5550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5551 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5552 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5554 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5555 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5556 if (j<temp_constraints) { 5557 PetscInt ii; 5558 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5559 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5560 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5561 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5562 for (k=0;k<temp_constraints-j;k++) { 5563 for (ii=0;ii<size_of_constraint;ii++) { 5564 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5565 } 5566 } 5567 } 5568 #else /* on missing GESVD */ 5569 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5570 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5571 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5572 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5573 #if !defined(PETSC_USE_COMPLEX) 5574 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5575 #else 5576 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5577 #endif 5578 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5579 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5580 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5581 k = temp_constraints; 5582 if (k > size_of_constraint) k = size_of_constraint; 5583 j = 0; 5584 while (j < k && singular_vals[k-j-1] < tol) j++; 5585 valid_constraints = k-j; 5586 total_counts = total_counts-temp_constraints+valid_constraints; 5587 #endif /* on missing GESVD */ 5588 } 5589 } 5590 /* update pointers information */ 5591 if (valid_constraints) { 5592 constraints_n[total_counts_cc] = valid_constraints; 5593 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5594 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5595 /* set change_of_basis flag */ 5596 if (boolforchange) { 5597 PetscBTSet(change_basis,total_counts_cc); 5598 } 5599 total_counts_cc++; 5600 } 5601 } 5602 /* free workspace */ 5603 if (!skip_lapack) { 5604 ierr = PetscFree(work);CHKERRQ(ierr); 5605 #if defined(PETSC_USE_COMPLEX) 5606 ierr = PetscFree(rwork);CHKERRQ(ierr); 5607 #endif 5608 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5609 #if defined(PETSC_MISSING_LAPACK_GESVD) 5610 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5611 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5612 #endif 5613 } 5614 for (k=0;k<nnsp_size;k++) { 5615 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5616 } 5617 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5618 /* free index sets of faces, edges and vertices */ 5619 for (i=0;i<n_ISForFaces;i++) { 5620 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5621 } 5622 if (n_ISForFaces) { 5623 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5624 } 5625 for (i=0;i<n_ISForEdges;i++) { 5626 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5627 } 5628 if (n_ISForEdges) { 5629 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5630 } 5631 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5632 } else { 5633 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5634 5635 total_counts = 0; 5636 n_vertices = 0; 5637 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5638 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5639 } 5640 max_constraints = 0; 5641 total_counts_cc = 0; 5642 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5643 total_counts += pcbddc->adaptive_constraints_n[i]; 5644 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5645 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5646 } 5647 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5648 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5649 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5650 constraints_data = pcbddc->adaptive_constraints_data; 5651 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5652 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5653 total_counts_cc = 0; 5654 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5655 if (pcbddc->adaptive_constraints_n[i]) { 5656 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5657 } 5658 } 5659 #if 0 5660 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5661 for (i=0;i<total_counts_cc;i++) { 5662 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5663 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5664 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5665 printf(" %d",constraints_idxs[j]); 5666 } 5667 printf("\n"); 5668 printf("number of cc: %d\n",constraints_n[i]); 5669 } 5670 for (i=0;i<n_vertices;i++) { 5671 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5672 } 5673 for (i=0;i<sub_schurs->n_subs;i++) { 5674 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5675 } 5676 #endif 5677 5678 max_size_of_constraint = 0; 5679 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5680 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5681 /* Change of basis */ 5682 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5683 if (pcbddc->use_change_of_basis) { 5684 for (i=0;i<sub_schurs->n_subs;i++) { 5685 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5686 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5687 } 5688 } 5689 } 5690 } 5691 pcbddc->local_primal_size = total_counts; 5692 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5693 5694 /* map constraints_idxs in boundary numbering */ 5695 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5696 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5697 5698 /* Create constraint matrix */ 5699 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5700 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5701 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5702 5703 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5704 /* determine if a QR strategy is needed for change of basis */ 5705 qr_needed = PETSC_FALSE; 5706 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5707 total_primal_vertices=0; 5708 pcbddc->local_primal_size_cc = 0; 5709 for (i=0;i<total_counts_cc;i++) { 5710 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5711 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5712 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5713 pcbddc->local_primal_size_cc += 1; 5714 } else if (PetscBTLookup(change_basis,i)) { 5715 for (k=0;k<constraints_n[i];k++) { 5716 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5717 } 5718 pcbddc->local_primal_size_cc += constraints_n[i]; 5719 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5720 PetscBTSet(qr_needed_idx,i); 5721 qr_needed = PETSC_TRUE; 5722 } 5723 } else { 5724 pcbddc->local_primal_size_cc += 1; 5725 } 5726 } 5727 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5728 pcbddc->n_vertices = total_primal_vertices; 5729 /* permute indices in order to have a sorted set of vertices */ 5730 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5731 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5732 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5733 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5734 5735 /* nonzero structure of constraint matrix */ 5736 /* and get reference dof for local constraints */ 5737 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5738 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5739 5740 j = total_primal_vertices; 5741 total_counts = total_primal_vertices; 5742 cum = total_primal_vertices; 5743 for (i=n_vertices;i<total_counts_cc;i++) { 5744 if (!PetscBTLookup(change_basis,i)) { 5745 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5746 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5747 cum++; 5748 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5749 for (k=0;k<constraints_n[i];k++) { 5750 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5751 nnz[j+k] = size_of_constraint; 5752 } 5753 j += constraints_n[i]; 5754 } 5755 } 5756 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5757 ierr = PetscFree(nnz);CHKERRQ(ierr); 5758 5759 /* set values in constraint matrix */ 5760 for (i=0;i<total_primal_vertices;i++) { 5761 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5762 } 5763 total_counts = total_primal_vertices; 5764 for (i=n_vertices;i<total_counts_cc;i++) { 5765 if (!PetscBTLookup(change_basis,i)) { 5766 PetscInt *cols; 5767 5768 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5769 cols = constraints_idxs+constraints_idxs_ptr[i]; 5770 for (k=0;k<constraints_n[i];k++) { 5771 PetscInt row = total_counts+k; 5772 PetscScalar *vals; 5773 5774 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5775 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5776 } 5777 total_counts += constraints_n[i]; 5778 } 5779 } 5780 /* assembling */ 5781 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5782 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5783 5784 /* 5785 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5786 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5787 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5788 */ 5789 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5790 if (pcbddc->use_change_of_basis) { 5791 /* dual and primal dofs on a single cc */ 5792 PetscInt dual_dofs,primal_dofs; 5793 /* working stuff for GEQRF */ 5794 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5795 PetscBLASInt lqr_work; 5796 /* working stuff for UNGQR */ 5797 PetscScalar *gqr_work,lgqr_work_t; 5798 PetscBLASInt lgqr_work; 5799 /* working stuff for TRTRS */ 5800 PetscScalar *trs_rhs; 5801 PetscBLASInt Blas_NRHS; 5802 /* pointers for values insertion into change of basis matrix */ 5803 PetscInt *start_rows,*start_cols; 5804 PetscScalar *start_vals; 5805 /* working stuff for values insertion */ 5806 PetscBT is_primal; 5807 PetscInt *aux_primal_numbering_B; 5808 /* matrix sizes */ 5809 PetscInt global_size,local_size; 5810 /* temporary change of basis */ 5811 Mat localChangeOfBasisMatrix; 5812 /* extra space for debugging */ 5813 PetscScalar *dbg_work; 5814 5815 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5816 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5817 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5818 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5819 /* nonzeros for local mat */ 5820 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5821 if (!pcbddc->benign_change || pcbddc->fake_change) { 5822 for (i=0;i<pcis->n;i++) nnz[i]=1; 5823 } else { 5824 const PetscInt *ii; 5825 PetscInt n; 5826 PetscBool flg_row; 5827 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5828 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5829 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5830 } 5831 for (i=n_vertices;i<total_counts_cc;i++) { 5832 if (PetscBTLookup(change_basis,i)) { 5833 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5834 if (PetscBTLookup(qr_needed_idx,i)) { 5835 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5836 } else { 5837 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5838 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5839 } 5840 } 5841 } 5842 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5843 ierr = PetscFree(nnz);CHKERRQ(ierr); 5844 /* Set interior change in the matrix */ 5845 if (!pcbddc->benign_change || pcbddc->fake_change) { 5846 for (i=0;i<pcis->n;i++) { 5847 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5848 } 5849 } else { 5850 const PetscInt *ii,*jj; 5851 PetscScalar *aa; 5852 PetscInt n; 5853 PetscBool flg_row; 5854 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5855 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5856 for (i=0;i<n;i++) { 5857 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5858 } 5859 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5860 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5861 } 5862 5863 if (pcbddc->dbg_flag) { 5864 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5865 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5866 } 5867 5868 5869 /* Now we loop on the constraints which need a change of basis */ 5870 /* 5871 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5872 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5873 5874 Basic blocks of change of basis matrix T computed by 5875 5876 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5877 5878 | 1 0 ... 0 s_1/S | 5879 | 0 1 ... 0 s_2/S | 5880 | ... | 5881 | 0 ... 1 s_{n-1}/S | 5882 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5883 5884 with S = \sum_{i=1}^n s_i^2 5885 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5886 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5887 5888 - QR decomposition of constraints otherwise 5889 */ 5890 if (qr_needed) { 5891 /* space to store Q */ 5892 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5893 /* array to store scaling factors for reflectors */ 5894 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5895 /* first we issue queries for optimal work */ 5896 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5897 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5898 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5899 lqr_work = -1; 5900 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5901 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5902 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5903 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5904 lgqr_work = -1; 5905 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5906 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5907 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5908 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5909 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5910 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5911 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5912 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5913 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5914 /* array to store rhs and solution of triangular solver */ 5915 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5916 /* allocating workspace for check */ 5917 if (pcbddc->dbg_flag) { 5918 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5919 } 5920 } 5921 /* array to store whether a node is primal or not */ 5922 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5923 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5924 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5925 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5926 for (i=0;i<total_primal_vertices;i++) { 5927 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5928 } 5929 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5930 5931 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5932 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5933 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5934 if (PetscBTLookup(change_basis,total_counts)) { 5935 /* get constraint info */ 5936 primal_dofs = constraints_n[total_counts]; 5937 dual_dofs = size_of_constraint-primal_dofs; 5938 5939 if (pcbddc->dbg_flag) { 5940 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5941 } 5942 5943 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5944 5945 /* copy quadrature constraints for change of basis check */ 5946 if (pcbddc->dbg_flag) { 5947 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5948 } 5949 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5950 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5951 5952 /* compute QR decomposition of constraints */ 5953 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5954 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5955 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5956 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5957 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5958 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5959 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5960 5961 /* explictly compute R^-T */ 5962 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5963 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5964 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5965 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5966 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5967 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5968 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5969 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5970 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5971 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5972 5973 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5974 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5975 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5976 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5977 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5978 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5979 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5980 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5981 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5982 5983 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5984 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5985 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5986 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5987 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5988 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5989 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5990 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5991 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5992 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5993 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5994 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5995 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5996 5997 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5998 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5999 /* insert cols for primal dofs */ 6000 for (j=0;j<primal_dofs;j++) { 6001 start_vals = &qr_basis[j*size_of_constraint]; 6002 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6003 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6004 } 6005 /* insert cols for dual dofs */ 6006 for (j=0,k=0;j<dual_dofs;k++) { 6007 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6008 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6009 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6010 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6011 j++; 6012 } 6013 } 6014 6015 /* check change of basis */ 6016 if (pcbddc->dbg_flag) { 6017 PetscInt ii,jj; 6018 PetscBool valid_qr=PETSC_TRUE; 6019 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6020 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6021 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6022 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6023 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6024 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6025 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6026 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6027 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6028 for (jj=0;jj<size_of_constraint;jj++) { 6029 for (ii=0;ii<primal_dofs;ii++) { 6030 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6031 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 6032 } 6033 } 6034 if (!valid_qr) { 6035 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6036 for (jj=0;jj<size_of_constraint;jj++) { 6037 for (ii=0;ii<primal_dofs;ii++) { 6038 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6039 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6040 } 6041 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 6042 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6043 } 6044 } 6045 } 6046 } else { 6047 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6048 } 6049 } 6050 } else { /* simple transformation block */ 6051 PetscInt row,col; 6052 PetscScalar val,norm; 6053 6054 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6055 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6056 for (j=0;j<size_of_constraint;j++) { 6057 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6058 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6059 if (!PetscBTLookup(is_primal,row_B)) { 6060 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6061 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6062 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6063 } else { 6064 for (k=0;k<size_of_constraint;k++) { 6065 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6066 if (row != col) { 6067 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6068 } else { 6069 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6070 } 6071 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6072 } 6073 } 6074 } 6075 if (pcbddc->dbg_flag) { 6076 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6077 } 6078 } 6079 } else { 6080 if (pcbddc->dbg_flag) { 6081 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6082 } 6083 } 6084 } 6085 6086 /* free workspace */ 6087 if (qr_needed) { 6088 if (pcbddc->dbg_flag) { 6089 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6090 } 6091 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6092 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6093 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6094 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6095 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6096 } 6097 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6098 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6099 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6100 6101 /* assembling of global change of variable */ 6102 if (!pcbddc->fake_change) { 6103 Mat tmat; 6104 PetscInt bs; 6105 6106 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6107 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6108 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6109 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6110 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6111 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6112 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6113 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6114 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6115 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6116 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6117 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6118 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6119 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6120 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6121 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6122 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6123 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6124 6125 /* check */ 6126 if (pcbddc->dbg_flag) { 6127 PetscReal error; 6128 Vec x,x_change; 6129 6130 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6131 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6132 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6133 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6134 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6135 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6136 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6137 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6138 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6139 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6140 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6141 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6142 if (error > PETSC_SMALL) { 6143 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6144 } 6145 ierr = VecDestroy(&x);CHKERRQ(ierr); 6146 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6147 } 6148 /* adapt sub_schurs computed (if any) */ 6149 if (pcbddc->use_deluxe_scaling) { 6150 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6151 6152 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6153 if (sub_schurs && sub_schurs->S_Ej_all) { 6154 Mat S_new,tmat; 6155 IS is_all_N,is_V_Sall = NULL; 6156 6157 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6158 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6159 if (pcbddc->deluxe_zerorows) { 6160 ISLocalToGlobalMapping NtoSall; 6161 IS is_V; 6162 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6163 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6164 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6165 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6166 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6167 } 6168 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6169 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6170 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6171 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6172 if (pcbddc->deluxe_zerorows) { 6173 const PetscScalar *array; 6174 const PetscInt *idxs_V,*idxs_all; 6175 PetscInt i,n_V; 6176 6177 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6178 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6179 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6180 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6181 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6182 for (i=0;i<n_V;i++) { 6183 PetscScalar val; 6184 PetscInt idx; 6185 6186 idx = idxs_V[i]; 6187 val = array[idxs_all[idxs_V[i]]]; 6188 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6189 } 6190 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6191 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6192 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6193 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6194 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6195 } 6196 sub_schurs->S_Ej_all = S_new; 6197 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6198 if (sub_schurs->sum_S_Ej_all) { 6199 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6200 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6201 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6202 if (pcbddc->deluxe_zerorows) { 6203 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6204 } 6205 sub_schurs->sum_S_Ej_all = S_new; 6206 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6207 } 6208 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6209 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6210 } 6211 /* destroy any change of basis context in sub_schurs */ 6212 if (sub_schurs && sub_schurs->change) { 6213 PetscInt i; 6214 6215 for (i=0;i<sub_schurs->n_subs;i++) { 6216 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6217 } 6218 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6219 } 6220 } 6221 if (pcbddc->switch_static) { /* need to save the local change */ 6222 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6223 } else { 6224 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6225 } 6226 /* determine if any process has changed the pressures locally */ 6227 pcbddc->change_interior = pcbddc->benign_have_null; 6228 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6229 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6230 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6231 pcbddc->use_qr_single = qr_needed; 6232 } 6233 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6234 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6235 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6236 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6237 } else { 6238 Mat benign_global = NULL; 6239 if (pcbddc->benign_have_null) { 6240 Mat tmat; 6241 6242 pcbddc->change_interior = PETSC_TRUE; 6243 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6244 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6245 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6246 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6247 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6248 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6249 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6250 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6251 if (pcbddc->benign_change) { 6252 Mat M; 6253 6254 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6255 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6256 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6257 ierr = MatDestroy(&M);CHKERRQ(ierr); 6258 } else { 6259 Mat eye; 6260 PetscScalar *array; 6261 6262 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6263 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6264 for (i=0;i<pcis->n;i++) { 6265 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6266 } 6267 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6268 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6269 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6270 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6271 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6272 } 6273 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6274 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6275 } 6276 if (pcbddc->user_ChangeOfBasisMatrix) { 6277 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6278 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6279 } else if (pcbddc->benign_have_null) { 6280 pcbddc->ChangeOfBasisMatrix = benign_global; 6281 } 6282 } 6283 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6284 IS is_global; 6285 const PetscInt *gidxs; 6286 6287 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6288 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6289 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6290 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6291 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6292 } 6293 } 6294 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6295 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6296 } 6297 6298 if (!pcbddc->fake_change) { 6299 /* add pressure dofs to set of primal nodes for numbering purposes */ 6300 for (i=0;i<pcbddc->benign_n;i++) { 6301 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6302 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6303 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6304 pcbddc->local_primal_size_cc++; 6305 pcbddc->local_primal_size++; 6306 } 6307 6308 /* check if a new primal space has been introduced (also take into account benign trick) */ 6309 pcbddc->new_primal_space_local = PETSC_TRUE; 6310 if (olocal_primal_size == pcbddc->local_primal_size) { 6311 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6312 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6313 if (!pcbddc->new_primal_space_local) { 6314 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6315 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6316 } 6317 } 6318 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6319 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6320 } 6321 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6322 6323 /* flush dbg viewer */ 6324 if (pcbddc->dbg_flag) { 6325 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6326 } 6327 6328 /* free workspace */ 6329 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6330 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6331 if (!pcbddc->adaptive_selection) { 6332 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6333 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6334 } else { 6335 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6336 pcbddc->adaptive_constraints_idxs_ptr, 6337 pcbddc->adaptive_constraints_data_ptr, 6338 pcbddc->adaptive_constraints_idxs, 6339 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6340 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6341 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6342 } 6343 PetscFunctionReturn(0); 6344 } 6345 6346 #undef __FUNCT__ 6347 #define __FUNCT__ "PCBDDCAnalyzeInterface" 6348 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6349 { 6350 ISLocalToGlobalMapping map; 6351 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6352 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6353 PetscInt i,N; 6354 PetscBool rcsr = PETSC_FALSE; 6355 PetscErrorCode ierr; 6356 6357 PetscFunctionBegin; 6358 if (pcbddc->recompute_topography) { 6359 pcbddc->graphanalyzed = PETSC_FALSE; 6360 /* Reset previously computed graph */ 6361 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6362 /* Init local Graph struct */ 6363 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6364 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6365 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6366 6367 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6368 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6369 } 6370 /* Check validity of the csr graph passed in by the user */ 6371 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6372 6373 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6374 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6375 PetscInt *xadj,*adjncy; 6376 PetscInt nvtxs; 6377 PetscBool flg_row=PETSC_FALSE; 6378 6379 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6380 if (flg_row) { 6381 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6382 pcbddc->computed_rowadj = PETSC_TRUE; 6383 } 6384 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6385 rcsr = PETSC_TRUE; 6386 } 6387 if (pcbddc->dbg_flag) { 6388 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6389 } 6390 6391 /* Setup of Graph */ 6392 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6393 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6394 6395 /* attach info on disconnected subdomains if present */ 6396 if (pcbddc->n_local_subs) { 6397 PetscInt *local_subs; 6398 6399 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6400 for (i=0;i<pcbddc->n_local_subs;i++) { 6401 const PetscInt *idxs; 6402 PetscInt nl,j; 6403 6404 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6405 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6406 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6407 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6408 } 6409 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6410 pcbddc->mat_graph->local_subs = local_subs; 6411 } 6412 } 6413 6414 if (!pcbddc->graphanalyzed) { 6415 /* Graph's connected components analysis */ 6416 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6417 pcbddc->graphanalyzed = PETSC_TRUE; 6418 } 6419 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6420 PetscFunctionReturn(0); 6421 } 6422 6423 #undef __FUNCT__ 6424 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6425 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6426 { 6427 PetscInt i,j; 6428 PetscScalar *alphas; 6429 PetscErrorCode ierr; 6430 6431 PetscFunctionBegin; 6432 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6433 for (i=0;i<n;i++) { 6434 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6435 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6436 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6437 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6438 } 6439 ierr = PetscFree(alphas);CHKERRQ(ierr); 6440 PetscFunctionReturn(0); 6441 } 6442 6443 #undef __FUNCT__ 6444 #define __FUNCT__ "PCBDDCMatISGetSubassemblingPattern" 6445 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6446 { 6447 Mat A; 6448 PetscInt n_neighs,*neighs,*n_shared,**shared; 6449 PetscMPIInt size,rank,color; 6450 PetscInt *xadj,*adjncy; 6451 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6452 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6453 PetscInt void_procs,*procs_candidates = NULL; 6454 PetscInt xadj_count,*count; 6455 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6456 PetscSubcomm psubcomm; 6457 MPI_Comm subcomm; 6458 PetscErrorCode ierr; 6459 6460 PetscFunctionBegin; 6461 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6462 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6463 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6464 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6465 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6466 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6467 6468 if (have_void) *have_void = PETSC_FALSE; 6469 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6470 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6471 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6472 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6473 im_active = !!n; 6474 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6475 void_procs = size - active_procs; 6476 /* get ranks of of non-active processes in mat communicator */ 6477 if (void_procs) { 6478 PetscInt ncand; 6479 6480 if (have_void) *have_void = PETSC_TRUE; 6481 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6482 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6483 for (i=0,ncand=0;i<size;i++) { 6484 if (!procs_candidates[i]) { 6485 procs_candidates[ncand++] = i; 6486 } 6487 } 6488 /* force n_subdomains to be not greater that the number of non-active processes */ 6489 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6490 } 6491 6492 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6493 number of subdomains requested 1 -> send to master or first candidate in voids */ 6494 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6495 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6496 PetscInt issize,isidx,dest; 6497 if (*n_subdomains == 1) dest = 0; 6498 else dest = rank; 6499 if (im_active) { 6500 issize = 1; 6501 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6502 isidx = procs_candidates[dest]; 6503 } else { 6504 isidx = dest; 6505 } 6506 } else { 6507 issize = 0; 6508 isidx = -1; 6509 } 6510 if (*n_subdomains != 1) *n_subdomains = active_procs; 6511 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6512 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6513 PetscFunctionReturn(0); 6514 } 6515 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6516 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6517 threshold = PetscMax(threshold,2); 6518 6519 /* Get info on mapping */ 6520 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6521 6522 /* build local CSR graph of subdomains' connectivity */ 6523 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6524 xadj[0] = 0; 6525 xadj[1] = PetscMax(n_neighs-1,0); 6526 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6527 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6528 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6529 for (i=1;i<n_neighs;i++) 6530 for (j=0;j<n_shared[i];j++) 6531 count[shared[i][j]] += 1; 6532 6533 xadj_count = 0; 6534 for (i=1;i<n_neighs;i++) { 6535 for (j=0;j<n_shared[i];j++) { 6536 if (count[shared[i][j]] < threshold) { 6537 adjncy[xadj_count] = neighs[i]; 6538 adjncy_wgt[xadj_count] = n_shared[i]; 6539 xadj_count++; 6540 break; 6541 } 6542 } 6543 } 6544 xadj[1] = xadj_count; 6545 ierr = PetscFree(count);CHKERRQ(ierr); 6546 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6547 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6548 6549 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6550 6551 /* Restrict work on active processes only */ 6552 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6553 if (void_procs) { 6554 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6555 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6556 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6557 subcomm = PetscSubcommChild(psubcomm); 6558 } else { 6559 psubcomm = NULL; 6560 subcomm = PetscObjectComm((PetscObject)mat); 6561 } 6562 6563 v_wgt = NULL; 6564 if (!color) { 6565 ierr = PetscFree(xadj);CHKERRQ(ierr); 6566 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6567 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6568 } else { 6569 Mat subdomain_adj; 6570 IS new_ranks,new_ranks_contig; 6571 MatPartitioning partitioner; 6572 PetscInt rstart=0,rend=0; 6573 PetscInt *is_indices,*oldranks; 6574 PetscMPIInt size; 6575 PetscBool aggregate; 6576 6577 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6578 if (void_procs) { 6579 PetscInt prank = rank; 6580 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6581 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6582 for (i=0;i<xadj[1];i++) { 6583 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6584 } 6585 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6586 } else { 6587 oldranks = NULL; 6588 } 6589 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6590 if (aggregate) { /* TODO: all this part could be made more efficient */ 6591 PetscInt lrows,row,ncols,*cols; 6592 PetscMPIInt nrank; 6593 PetscScalar *vals; 6594 6595 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6596 lrows = 0; 6597 if (nrank<redprocs) { 6598 lrows = size/redprocs; 6599 if (nrank<size%redprocs) lrows++; 6600 } 6601 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6602 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6603 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6604 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6605 row = nrank; 6606 ncols = xadj[1]-xadj[0]; 6607 cols = adjncy; 6608 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6609 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6610 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6611 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6612 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6613 ierr = PetscFree(xadj);CHKERRQ(ierr); 6614 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6615 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6616 ierr = PetscFree(vals);CHKERRQ(ierr); 6617 if (use_vwgt) { 6618 Vec v; 6619 const PetscScalar *array; 6620 PetscInt nl; 6621 6622 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6623 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6624 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6625 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6626 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6627 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6628 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6629 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6630 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6631 ierr = VecDestroy(&v);CHKERRQ(ierr); 6632 } 6633 } else { 6634 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6635 if (use_vwgt) { 6636 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6637 v_wgt[0] = n; 6638 } 6639 } 6640 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6641 6642 /* Partition */ 6643 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6644 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6645 if (v_wgt) { 6646 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6647 } 6648 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6649 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6650 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6651 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6652 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6653 6654 /* renumber new_ranks to avoid "holes" in new set of processors */ 6655 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6656 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6657 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6658 if (!aggregate) { 6659 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6660 #if defined(PETSC_USE_DEBUG) 6661 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6662 #endif 6663 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6664 } else if (oldranks) { 6665 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6666 } else { 6667 ranks_send_to_idx[0] = is_indices[0]; 6668 } 6669 } else { 6670 PetscInt idxs[1]; 6671 PetscMPIInt tag; 6672 MPI_Request *reqs; 6673 6674 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6675 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6676 for (i=rstart;i<rend;i++) { 6677 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6678 } 6679 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6680 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6681 ierr = PetscFree(reqs);CHKERRQ(ierr); 6682 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6683 #if defined(PETSC_USE_DEBUG) 6684 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6685 #endif 6686 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6687 } else if (oldranks) { 6688 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6689 } else { 6690 ranks_send_to_idx[0] = idxs[0]; 6691 } 6692 } 6693 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6694 /* clean up */ 6695 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6696 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6697 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6698 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6699 } 6700 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6701 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6702 6703 /* assemble parallel IS for sends */ 6704 i = 1; 6705 if (!color) i=0; 6706 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6707 PetscFunctionReturn(0); 6708 } 6709 6710 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6711 6712 #undef __FUNCT__ 6713 #define __FUNCT__ "PCBDDCMatISSubassemble" 6714 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6715 { 6716 Mat local_mat; 6717 IS is_sends_internal; 6718 PetscInt rows,cols,new_local_rows; 6719 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6720 PetscBool ismatis,isdense,newisdense,destroy_mat; 6721 ISLocalToGlobalMapping l2gmap; 6722 PetscInt* l2gmap_indices; 6723 const PetscInt* is_indices; 6724 MatType new_local_type; 6725 /* buffers */ 6726 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6727 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6728 PetscInt *recv_buffer_idxs_local; 6729 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6730 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6731 /* MPI */ 6732 MPI_Comm comm,comm_n; 6733 PetscSubcomm subcomm; 6734 PetscMPIInt n_sends,n_recvs,commsize; 6735 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6736 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6737 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6738 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6739 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6740 PetscErrorCode ierr; 6741 6742 PetscFunctionBegin; 6743 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6744 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6745 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6746 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6747 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6748 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6749 PetscValidLogicalCollectiveBool(mat,reuse,6); 6750 PetscValidLogicalCollectiveInt(mat,nis,8); 6751 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6752 if (nvecs) { 6753 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6754 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6755 } 6756 /* further checks */ 6757 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6758 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6759 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6760 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6761 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6762 if (reuse && *mat_n) { 6763 PetscInt mrows,mcols,mnrows,mncols; 6764 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6765 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6766 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6767 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6768 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6769 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6770 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6771 } 6772 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6773 PetscValidLogicalCollectiveInt(mat,bs,0); 6774 6775 /* prepare IS for sending if not provided */ 6776 if (!is_sends) { 6777 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6778 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6779 } else { 6780 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6781 is_sends_internal = is_sends; 6782 } 6783 6784 /* get comm */ 6785 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6786 6787 /* compute number of sends */ 6788 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6789 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6790 6791 /* compute number of receives */ 6792 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6793 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6794 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6795 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6796 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6797 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6798 ierr = PetscFree(iflags);CHKERRQ(ierr); 6799 6800 /* restrict comm if requested */ 6801 subcomm = 0; 6802 destroy_mat = PETSC_FALSE; 6803 if (restrict_comm) { 6804 PetscMPIInt color,subcommsize; 6805 6806 color = 0; 6807 if (restrict_full) { 6808 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6809 } else { 6810 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6811 } 6812 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6813 subcommsize = commsize - subcommsize; 6814 /* check if reuse has been requested */ 6815 if (reuse) { 6816 if (*mat_n) { 6817 PetscMPIInt subcommsize2; 6818 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6819 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6820 comm_n = PetscObjectComm((PetscObject)*mat_n); 6821 } else { 6822 comm_n = PETSC_COMM_SELF; 6823 } 6824 } else { /* MAT_INITIAL_MATRIX */ 6825 PetscMPIInt rank; 6826 6827 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6828 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6829 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6830 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6831 comm_n = PetscSubcommChild(subcomm); 6832 } 6833 /* flag to destroy *mat_n if not significative */ 6834 if (color) destroy_mat = PETSC_TRUE; 6835 } else { 6836 comm_n = comm; 6837 } 6838 6839 /* prepare send/receive buffers */ 6840 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6841 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6842 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6843 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6844 if (nis) { 6845 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6846 } 6847 6848 /* Get data from local matrices */ 6849 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6850 /* TODO: See below some guidelines on how to prepare the local buffers */ 6851 /* 6852 send_buffer_vals should contain the raw values of the local matrix 6853 send_buffer_idxs should contain: 6854 - MatType_PRIVATE type 6855 - PetscInt size_of_l2gmap 6856 - PetscInt global_row_indices[size_of_l2gmap] 6857 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6858 */ 6859 else { 6860 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6861 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6862 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6863 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6864 send_buffer_idxs[1] = i; 6865 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6866 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6867 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6868 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6869 for (i=0;i<n_sends;i++) { 6870 ilengths_vals[is_indices[i]] = len*len; 6871 ilengths_idxs[is_indices[i]] = len+2; 6872 } 6873 } 6874 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6875 /* additional is (if any) */ 6876 if (nis) { 6877 PetscMPIInt psum; 6878 PetscInt j; 6879 for (j=0,psum=0;j<nis;j++) { 6880 PetscInt plen; 6881 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6882 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6883 psum += len+1; /* indices + lenght */ 6884 } 6885 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6886 for (j=0,psum=0;j<nis;j++) { 6887 PetscInt plen; 6888 const PetscInt *is_array_idxs; 6889 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6890 send_buffer_idxs_is[psum] = plen; 6891 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6892 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6893 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6894 psum += plen+1; /* indices + lenght */ 6895 } 6896 for (i=0;i<n_sends;i++) { 6897 ilengths_idxs_is[is_indices[i]] = psum; 6898 } 6899 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6900 } 6901 6902 buf_size_idxs = 0; 6903 buf_size_vals = 0; 6904 buf_size_idxs_is = 0; 6905 buf_size_vecs = 0; 6906 for (i=0;i<n_recvs;i++) { 6907 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6908 buf_size_vals += (PetscInt)olengths_vals[i]; 6909 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6910 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6911 } 6912 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6913 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6914 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6915 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6916 6917 /* get new tags for clean communications */ 6918 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6919 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6920 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6921 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6922 6923 /* allocate for requests */ 6924 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6925 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6926 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6927 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6928 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6929 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6930 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6931 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6932 6933 /* communications */ 6934 ptr_idxs = recv_buffer_idxs; 6935 ptr_vals = recv_buffer_vals; 6936 ptr_idxs_is = recv_buffer_idxs_is; 6937 ptr_vecs = recv_buffer_vecs; 6938 for (i=0;i<n_recvs;i++) { 6939 source_dest = onodes[i]; 6940 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6941 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6942 ptr_idxs += olengths_idxs[i]; 6943 ptr_vals += olengths_vals[i]; 6944 if (nis) { 6945 source_dest = onodes_is[i]; 6946 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6947 ptr_idxs_is += olengths_idxs_is[i]; 6948 } 6949 if (nvecs) { 6950 source_dest = onodes[i]; 6951 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6952 ptr_vecs += olengths_idxs[i]-2; 6953 } 6954 } 6955 for (i=0;i<n_sends;i++) { 6956 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6957 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6958 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6959 if (nis) { 6960 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6961 } 6962 if (nvecs) { 6963 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6964 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6965 } 6966 } 6967 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6968 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6969 6970 /* assemble new l2g map */ 6971 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6972 ptr_idxs = recv_buffer_idxs; 6973 new_local_rows = 0; 6974 for (i=0;i<n_recvs;i++) { 6975 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6976 ptr_idxs += olengths_idxs[i]; 6977 } 6978 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6979 ptr_idxs = recv_buffer_idxs; 6980 new_local_rows = 0; 6981 for (i=0;i<n_recvs;i++) { 6982 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6983 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6984 ptr_idxs += olengths_idxs[i]; 6985 } 6986 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6987 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6988 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6989 6990 /* infer new local matrix type from received local matrices type */ 6991 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6992 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6993 if (n_recvs) { 6994 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6995 ptr_idxs = recv_buffer_idxs; 6996 for (i=0;i<n_recvs;i++) { 6997 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6998 new_local_type_private = MATAIJ_PRIVATE; 6999 break; 7000 } 7001 ptr_idxs += olengths_idxs[i]; 7002 } 7003 switch (new_local_type_private) { 7004 case MATDENSE_PRIVATE: 7005 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 7006 new_local_type = MATSEQAIJ; 7007 bs = 1; 7008 } else { /* if I receive only 1 dense matrix */ 7009 new_local_type = MATSEQDENSE; 7010 bs = 1; 7011 } 7012 break; 7013 case MATAIJ_PRIVATE: 7014 new_local_type = MATSEQAIJ; 7015 bs = 1; 7016 break; 7017 case MATBAIJ_PRIVATE: 7018 new_local_type = MATSEQBAIJ; 7019 break; 7020 case MATSBAIJ_PRIVATE: 7021 new_local_type = MATSEQSBAIJ; 7022 break; 7023 default: 7024 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 7025 break; 7026 } 7027 } else { /* by default, new_local_type is seqdense */ 7028 new_local_type = MATSEQDENSE; 7029 bs = 1; 7030 } 7031 7032 /* create MATIS object if needed */ 7033 if (!reuse) { 7034 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7035 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7036 } else { 7037 /* it also destroys the local matrices */ 7038 if (*mat_n) { 7039 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7040 } else { /* this is a fake object */ 7041 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7042 } 7043 } 7044 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7045 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7046 7047 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7048 7049 /* Global to local map of received indices */ 7050 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7051 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7052 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7053 7054 /* restore attributes -> type of incoming data and its size */ 7055 buf_size_idxs = 0; 7056 for (i=0;i<n_recvs;i++) { 7057 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7058 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7059 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7060 } 7061 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7062 7063 /* set preallocation */ 7064 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7065 if (!newisdense) { 7066 PetscInt *new_local_nnz=0; 7067 7068 ptr_idxs = recv_buffer_idxs_local; 7069 if (n_recvs) { 7070 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7071 } 7072 for (i=0;i<n_recvs;i++) { 7073 PetscInt j; 7074 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7075 for (j=0;j<*(ptr_idxs+1);j++) { 7076 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7077 } 7078 } else { 7079 /* TODO */ 7080 } 7081 ptr_idxs += olengths_idxs[i]; 7082 } 7083 if (new_local_nnz) { 7084 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7085 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7086 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7087 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7088 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7089 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7090 } else { 7091 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7092 } 7093 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7094 } else { 7095 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7096 } 7097 7098 /* set values */ 7099 ptr_vals = recv_buffer_vals; 7100 ptr_idxs = recv_buffer_idxs_local; 7101 for (i=0;i<n_recvs;i++) { 7102 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7103 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7104 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7105 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7106 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7107 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7108 } else { 7109 /* TODO */ 7110 } 7111 ptr_idxs += olengths_idxs[i]; 7112 ptr_vals += olengths_vals[i]; 7113 } 7114 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7115 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7116 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7117 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7118 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7119 7120 #if 0 7121 if (!restrict_comm) { /* check */ 7122 Vec lvec,rvec; 7123 PetscReal infty_error; 7124 7125 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7126 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7127 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7128 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7129 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7130 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7131 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7132 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7133 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7134 } 7135 #endif 7136 7137 /* assemble new additional is (if any) */ 7138 if (nis) { 7139 PetscInt **temp_idxs,*count_is,j,psum; 7140 7141 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7142 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7143 ptr_idxs = recv_buffer_idxs_is; 7144 psum = 0; 7145 for (i=0;i<n_recvs;i++) { 7146 for (j=0;j<nis;j++) { 7147 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7148 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7149 psum += plen; 7150 ptr_idxs += plen+1; /* shift pointer to received data */ 7151 } 7152 } 7153 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7154 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7155 for (i=1;i<nis;i++) { 7156 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7157 } 7158 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7159 ptr_idxs = recv_buffer_idxs_is; 7160 for (i=0;i<n_recvs;i++) { 7161 for (j=0;j<nis;j++) { 7162 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7163 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7164 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7165 ptr_idxs += plen+1; /* shift pointer to received data */ 7166 } 7167 } 7168 for (i=0;i<nis;i++) { 7169 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7170 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7171 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7172 } 7173 ierr = PetscFree(count_is);CHKERRQ(ierr); 7174 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7175 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7176 } 7177 /* free workspace */ 7178 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7179 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7180 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7181 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7182 if (isdense) { 7183 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7184 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7185 } else { 7186 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7187 } 7188 if (nis) { 7189 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7190 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7191 } 7192 7193 if (nvecs) { 7194 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7195 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7196 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7197 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7198 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7199 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7200 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7201 /* set values */ 7202 ptr_vals = recv_buffer_vecs; 7203 ptr_idxs = recv_buffer_idxs_local; 7204 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7205 for (i=0;i<n_recvs;i++) { 7206 PetscInt j; 7207 for (j=0;j<*(ptr_idxs+1);j++) { 7208 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7209 } 7210 ptr_idxs += olengths_idxs[i]; 7211 ptr_vals += olengths_idxs[i]-2; 7212 } 7213 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7214 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7215 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7216 } 7217 7218 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7219 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7220 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7221 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7222 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7223 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7224 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7225 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7226 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7227 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7228 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7229 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7230 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7231 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7232 ierr = PetscFree(onodes);CHKERRQ(ierr); 7233 if (nis) { 7234 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7235 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7236 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7237 } 7238 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7239 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7240 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7241 for (i=0;i<nis;i++) { 7242 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7243 } 7244 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7245 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7246 } 7247 *mat_n = NULL; 7248 } 7249 PetscFunctionReturn(0); 7250 } 7251 7252 /* temporary hack into ksp private data structure */ 7253 #include <petsc/private/kspimpl.h> 7254 7255 #undef __FUNCT__ 7256 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 7257 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7258 { 7259 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7260 PC_IS *pcis = (PC_IS*)pc->data; 7261 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7262 Mat coarsedivudotp = NULL; 7263 Mat coarseG,t_coarse_mat_is; 7264 MatNullSpace CoarseNullSpace = NULL; 7265 ISLocalToGlobalMapping coarse_islg; 7266 IS coarse_is,*isarray; 7267 PetscInt i,im_active=-1,active_procs=-1; 7268 PetscInt nis,nisdofs,nisneu,nisvert; 7269 PC pc_temp; 7270 PCType coarse_pc_type; 7271 KSPType coarse_ksp_type; 7272 PetscBool multilevel_requested,multilevel_allowed; 7273 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7274 PetscInt ncoarse,nedcfield; 7275 PetscBool compute_vecs = PETSC_FALSE; 7276 PetscScalar *array; 7277 MatReuse coarse_mat_reuse; 7278 PetscBool restr, full_restr, have_void; 7279 PetscErrorCode ierr; 7280 7281 PetscFunctionBegin; 7282 /* Assign global numbering to coarse dofs */ 7283 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7284 PetscInt ocoarse_size; 7285 compute_vecs = PETSC_TRUE; 7286 ocoarse_size = pcbddc->coarse_size; 7287 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7288 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7289 /* see if we can avoid some work */ 7290 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7291 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7292 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7293 PC pc; 7294 PetscBool isbddc; 7295 7296 /* temporary workaround since PCBDDC does not have a reset method so far */ 7297 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 7298 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 7299 if (isbddc) { 7300 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 7301 } else { 7302 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7303 } 7304 coarse_reuse = PETSC_FALSE; 7305 } else { /* we can safely reuse already computed coarse matrix */ 7306 coarse_reuse = PETSC_TRUE; 7307 } 7308 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7309 coarse_reuse = PETSC_FALSE; 7310 } 7311 /* reset any subassembling information */ 7312 if (!coarse_reuse || pcbddc->recompute_topography) { 7313 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7314 } 7315 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7316 coarse_reuse = PETSC_TRUE; 7317 } 7318 /* assemble coarse matrix */ 7319 if (coarse_reuse && pcbddc->coarse_ksp) { 7320 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7321 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7322 coarse_mat_reuse = MAT_REUSE_MATRIX; 7323 } else { 7324 coarse_mat = NULL; 7325 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7326 } 7327 7328 /* creates temporary l2gmap and IS for coarse indexes */ 7329 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7330 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7331 7332 /* creates temporary MATIS object for coarse matrix */ 7333 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7334 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7335 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7336 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7337 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7338 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7339 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7340 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7341 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7342 7343 /* count "active" (i.e. with positive local size) and "void" processes */ 7344 im_active = !!(pcis->n); 7345 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7346 7347 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7348 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7349 /* full_restr : just use the receivers from the subassembling pattern */ 7350 coarse_mat_is = NULL; 7351 multilevel_allowed = PETSC_FALSE; 7352 multilevel_requested = PETSC_FALSE; 7353 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7354 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7355 if (multilevel_requested) { 7356 ncoarse = active_procs/pcbddc->coarsening_ratio; 7357 restr = PETSC_FALSE; 7358 full_restr = PETSC_FALSE; 7359 } else { 7360 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7361 restr = PETSC_TRUE; 7362 full_restr = PETSC_TRUE; 7363 } 7364 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7365 ncoarse = PetscMax(1,ncoarse); 7366 if (!pcbddc->coarse_subassembling) { 7367 if (pcbddc->coarsening_ratio > 1) { 7368 if (multilevel_requested) { 7369 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7370 } else { 7371 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7372 } 7373 } else { 7374 PetscMPIInt size,rank; 7375 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7376 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7377 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7378 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7379 } 7380 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7381 PetscInt psum; 7382 PetscMPIInt size; 7383 if (pcbddc->coarse_ksp) psum = 1; 7384 else psum = 0; 7385 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7386 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7387 if (ncoarse < size) have_void = PETSC_TRUE; 7388 } 7389 /* determine if we can go multilevel */ 7390 if (multilevel_requested) { 7391 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7392 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7393 } 7394 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7395 7396 /* dump subassembling pattern */ 7397 if (pcbddc->dbg_flag && multilevel_allowed) { 7398 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7399 } 7400 7401 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7402 nedcfield = -1; 7403 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7404 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7405 const PetscInt *idxs; 7406 ISLocalToGlobalMapping tmap; 7407 7408 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7409 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7410 /* allocate space for temporary storage */ 7411 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7412 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7413 /* allocate for IS array */ 7414 nisdofs = pcbddc->n_ISForDofsLocal; 7415 if (pcbddc->nedclocal) { 7416 if (pcbddc->nedfield > -1) { 7417 nedcfield = pcbddc->nedfield; 7418 } else { 7419 nedcfield = 0; 7420 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7421 nisdofs = 1; 7422 } 7423 } 7424 nisneu = !!pcbddc->NeumannBoundariesLocal; 7425 nisvert = 0; /* nisvert is not used */ 7426 nis = nisdofs + nisneu + nisvert; 7427 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7428 /* dofs splitting */ 7429 for (i=0;i<nisdofs;i++) { 7430 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7431 if (nedcfield != i) { 7432 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7433 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7434 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7435 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7436 } else { 7437 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7438 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7439 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7440 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7441 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7442 } 7443 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7444 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7445 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7446 } 7447 /* neumann boundaries */ 7448 if (pcbddc->NeumannBoundariesLocal) { 7449 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7450 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7451 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7452 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7453 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7454 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7455 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7456 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7457 } 7458 /* free memory */ 7459 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7460 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7461 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7462 } else { 7463 nis = 0; 7464 nisdofs = 0; 7465 nisneu = 0; 7466 nisvert = 0; 7467 isarray = NULL; 7468 } 7469 /* destroy no longer needed map */ 7470 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7471 7472 /* subassemble */ 7473 if (multilevel_allowed) { 7474 Vec vp[1]; 7475 PetscInt nvecs = 0; 7476 PetscBool reuse,reuser; 7477 7478 if (coarse_mat) reuse = PETSC_TRUE; 7479 else reuse = PETSC_FALSE; 7480 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7481 vp[0] = NULL; 7482 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7483 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7484 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7485 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7486 nvecs = 1; 7487 7488 if (pcbddc->divudotp) { 7489 Mat B,loc_divudotp; 7490 Vec v,p; 7491 IS dummy; 7492 PetscInt np; 7493 7494 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7495 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7496 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7497 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7498 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7499 ierr = VecSet(p,1.);CHKERRQ(ierr); 7500 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7501 ierr = VecDestroy(&p);CHKERRQ(ierr); 7502 ierr = MatDestroy(&B);CHKERRQ(ierr); 7503 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7504 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7505 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7506 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7507 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7508 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7509 ierr = VecDestroy(&v);CHKERRQ(ierr); 7510 } 7511 } 7512 if (reuser) { 7513 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7514 } else { 7515 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7516 } 7517 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7518 PetscScalar *arraym,*arrayv; 7519 PetscInt nl; 7520 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7521 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7522 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7523 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7524 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7525 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7526 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7527 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7528 } else { 7529 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7530 } 7531 } else { 7532 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7533 } 7534 if (coarse_mat_is || coarse_mat) { 7535 PetscMPIInt size; 7536 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7537 if (!multilevel_allowed) { 7538 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7539 } else { 7540 Mat A; 7541 7542 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7543 if (coarse_mat_is) { 7544 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7545 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7546 coarse_mat = coarse_mat_is; 7547 } 7548 /* be sure we don't have MatSeqDENSE as local mat */ 7549 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7550 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7551 } 7552 } 7553 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7554 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7555 7556 /* create local to global scatters for coarse problem */ 7557 if (compute_vecs) { 7558 PetscInt lrows; 7559 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7560 if (coarse_mat) { 7561 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7562 } else { 7563 lrows = 0; 7564 } 7565 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7566 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7567 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7568 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7569 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7570 } 7571 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7572 7573 /* set defaults for coarse KSP and PC */ 7574 if (multilevel_allowed) { 7575 coarse_ksp_type = KSPRICHARDSON; 7576 coarse_pc_type = PCBDDC; 7577 } else { 7578 coarse_ksp_type = KSPPREONLY; 7579 coarse_pc_type = PCREDUNDANT; 7580 } 7581 7582 /* print some info if requested */ 7583 if (pcbddc->dbg_flag) { 7584 if (!multilevel_allowed) { 7585 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7586 if (multilevel_requested) { 7587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7588 } else if (pcbddc->max_levels) { 7589 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7590 } 7591 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7592 } 7593 } 7594 7595 /* communicate coarse discrete gradient */ 7596 coarseG = NULL; 7597 if (pcbddc->nedcG && multilevel_allowed) { 7598 MPI_Comm ccomm; 7599 if (coarse_mat) { 7600 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7601 } else { 7602 ccomm = MPI_COMM_NULL; 7603 } 7604 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7605 } 7606 7607 /* create the coarse KSP object only once with defaults */ 7608 if (coarse_mat) { 7609 PetscViewer dbg_viewer = NULL; 7610 if (pcbddc->dbg_flag) { 7611 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7612 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7613 } 7614 if (!pcbddc->coarse_ksp) { 7615 char prefix[256],str_level[16]; 7616 size_t len; 7617 7618 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7619 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7620 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7621 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7622 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7623 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7624 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7625 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7626 /* TODO is this logic correct? should check for coarse_mat type */ 7627 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7628 /* prefix */ 7629 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7630 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7631 if (!pcbddc->current_level) { 7632 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7633 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7634 } else { 7635 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7636 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7637 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7638 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7639 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7640 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7641 } 7642 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7643 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7644 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7645 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7646 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7647 /* allow user customization */ 7648 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7649 } 7650 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7651 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7652 if (nisdofs) { 7653 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7654 for (i=0;i<nisdofs;i++) { 7655 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7656 } 7657 } 7658 if (nisneu) { 7659 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7660 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7661 } 7662 if (nisvert) { 7663 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7664 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7665 } 7666 if (coarseG) { 7667 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7668 } 7669 7670 /* get some info after set from options */ 7671 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7672 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7673 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7674 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7675 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7676 isbddc = PETSC_FALSE; 7677 } 7678 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7679 if (isredundant) { 7680 KSP inner_ksp; 7681 PC inner_pc; 7682 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7683 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7684 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7685 } 7686 7687 /* parameters which miss an API */ 7688 if (isbddc) { 7689 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7690 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7691 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7692 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7693 if (pcbddc_coarse->benign_saddle_point) { 7694 Mat coarsedivudotp_is; 7695 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7696 IS row,col; 7697 const PetscInt *gidxs; 7698 PetscInt n,st,M,N; 7699 7700 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7701 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7702 st = st-n; 7703 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7704 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7705 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7706 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7707 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7708 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7709 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7710 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7711 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7712 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7713 ierr = ISDestroy(&row);CHKERRQ(ierr); 7714 ierr = ISDestroy(&col);CHKERRQ(ierr); 7715 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7716 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7717 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7718 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7719 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7720 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7721 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7722 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7723 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7724 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7725 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7726 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7727 } 7728 } 7729 7730 /* propagate symmetry info of coarse matrix */ 7731 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7732 if (pc->pmat->symmetric_set) { 7733 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7734 } 7735 if (pc->pmat->hermitian_set) { 7736 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7737 } 7738 if (pc->pmat->spd_set) { 7739 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7740 } 7741 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7742 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7743 } 7744 /* set operators */ 7745 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7746 if (pcbddc->dbg_flag) { 7747 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7748 } 7749 } 7750 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7751 ierr = PetscFree(isarray);CHKERRQ(ierr); 7752 #if 0 7753 { 7754 PetscViewer viewer; 7755 char filename[256]; 7756 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7757 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7758 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7759 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7760 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7761 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7762 } 7763 #endif 7764 7765 if (pcbddc->coarse_ksp) { 7766 Vec crhs,csol; 7767 7768 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7769 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7770 if (!csol) { 7771 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7772 } 7773 if (!crhs) { 7774 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7775 } 7776 } 7777 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7778 7779 /* compute null space for coarse solver if the benign trick has been requested */ 7780 if (pcbddc->benign_null) { 7781 7782 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7783 for (i=0;i<pcbddc->benign_n;i++) { 7784 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7785 } 7786 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7787 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7788 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7789 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7790 if (coarse_mat) { 7791 Vec nullv; 7792 PetscScalar *array,*array2; 7793 PetscInt nl; 7794 7795 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7796 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7797 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7798 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7799 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7800 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7801 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7802 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7803 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7804 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7805 } 7806 } 7807 7808 if (pcbddc->coarse_ksp) { 7809 PetscBool ispreonly; 7810 7811 if (CoarseNullSpace) { 7812 PetscBool isnull; 7813 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7814 if (isnull) { 7815 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7816 } 7817 /* TODO: add local nullspaces (if any) */ 7818 } 7819 /* setup coarse ksp */ 7820 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7821 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7822 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7823 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7824 KSP check_ksp; 7825 KSPType check_ksp_type; 7826 PC check_pc; 7827 Vec check_vec,coarse_vec; 7828 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7829 PetscInt its; 7830 PetscBool compute_eigs; 7831 PetscReal *eigs_r,*eigs_c; 7832 PetscInt neigs; 7833 const char *prefix; 7834 7835 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7836 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7837 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7838 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7839 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7840 /* prevent from setup unneeded object */ 7841 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7842 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7843 if (ispreonly) { 7844 check_ksp_type = KSPPREONLY; 7845 compute_eigs = PETSC_FALSE; 7846 } else { 7847 check_ksp_type = KSPGMRES; 7848 compute_eigs = PETSC_TRUE; 7849 } 7850 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7851 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7852 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7853 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7854 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7855 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7856 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7857 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7858 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7859 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7860 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7861 /* create random vec */ 7862 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7863 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7864 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7865 /* solve coarse problem */ 7866 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7867 /* set eigenvalue estimation if preonly has not been requested */ 7868 if (compute_eigs) { 7869 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7870 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7871 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7872 if (neigs) { 7873 lambda_max = eigs_r[neigs-1]; 7874 lambda_min = eigs_r[0]; 7875 if (pcbddc->use_coarse_estimates) { 7876 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7877 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7878 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7879 } 7880 } 7881 } 7882 } 7883 7884 /* check coarse problem residual error */ 7885 if (pcbddc->dbg_flag) { 7886 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7887 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7888 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7889 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7890 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7891 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7892 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7893 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7894 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7895 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7896 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7897 if (CoarseNullSpace) { 7898 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7899 } 7900 if (compute_eigs) { 7901 PetscReal lambda_max_s,lambda_min_s; 7902 KSPConvergedReason reason; 7903 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7904 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7905 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7906 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7907 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7908 for (i=0;i<neigs;i++) { 7909 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7910 } 7911 } 7912 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7913 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7914 } 7915 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7916 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7917 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7918 if (compute_eigs) { 7919 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7920 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7921 } 7922 } 7923 } 7924 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7925 /* print additional info */ 7926 if (pcbddc->dbg_flag) { 7927 /* waits until all processes reaches this point */ 7928 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7929 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7930 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7931 } 7932 7933 /* free memory */ 7934 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7935 PetscFunctionReturn(0); 7936 } 7937 7938 #undef __FUNCT__ 7939 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7940 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7941 { 7942 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7943 PC_IS* pcis = (PC_IS*)pc->data; 7944 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7945 IS subset,subset_mult,subset_n; 7946 PetscInt local_size,coarse_size=0; 7947 PetscInt *local_primal_indices=NULL; 7948 const PetscInt *t_local_primal_indices; 7949 PetscErrorCode ierr; 7950 7951 PetscFunctionBegin; 7952 /* Compute global number of coarse dofs */ 7953 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7954 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7955 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7956 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7957 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7958 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7959 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7960 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7961 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7962 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7963 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7964 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7965 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7966 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7967 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7968 7969 /* check numbering */ 7970 if (pcbddc->dbg_flag) { 7971 PetscScalar coarsesum,*array,*array2; 7972 PetscInt i; 7973 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7974 7975 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7976 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7977 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7978 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7979 /* counter */ 7980 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7981 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7982 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7983 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7984 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7985 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7986 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7987 for (i=0;i<pcbddc->local_primal_size;i++) { 7988 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7989 } 7990 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7991 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7992 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7993 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7994 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7995 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7996 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7997 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7998 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7999 for (i=0;i<pcis->n;i++) { 8000 if (array[i] != 0.0 && array[i] != array2[i]) { 8001 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8002 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8003 set_error = PETSC_TRUE; 8004 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8005 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8006 } 8007 } 8008 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8009 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8010 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8011 for (i=0;i<pcis->n;i++) { 8012 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8013 } 8014 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8015 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8016 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8017 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8018 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8019 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8020 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8021 PetscInt *gidxs; 8022 8023 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8024 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8025 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8026 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8027 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8028 for (i=0;i<pcbddc->local_primal_size;i++) { 8029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8030 } 8031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8032 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8033 } 8034 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8035 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8036 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8037 } 8038 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8039 /* get back data */ 8040 *coarse_size_n = coarse_size; 8041 *local_primal_indices_n = local_primal_indices; 8042 PetscFunctionReturn(0); 8043 } 8044 8045 #undef __FUNCT__ 8046 #define __FUNCT__ "PCBDDCGlobalToLocal" 8047 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8048 { 8049 IS localis_t; 8050 PetscInt i,lsize,*idxs,n; 8051 PetscScalar *vals; 8052 PetscErrorCode ierr; 8053 8054 PetscFunctionBegin; 8055 /* get indices in local ordering exploiting local to global map */ 8056 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8057 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8058 for (i=0;i<lsize;i++) vals[i] = 1.0; 8059 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8060 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8061 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8062 if (idxs) { /* multilevel guard */ 8063 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8064 } 8065 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8066 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8067 ierr = PetscFree(vals);CHKERRQ(ierr); 8068 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8069 /* now compute set in local ordering */ 8070 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8071 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8072 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8073 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8074 for (i=0,lsize=0;i<n;i++) { 8075 if (PetscRealPart(vals[i]) > 0.5) { 8076 lsize++; 8077 } 8078 } 8079 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8080 for (i=0,lsize=0;i<n;i++) { 8081 if (PetscRealPart(vals[i]) > 0.5) { 8082 idxs[lsize++] = i; 8083 } 8084 } 8085 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8086 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8087 *localis = localis_t; 8088 PetscFunctionReturn(0); 8089 } 8090 8091 #undef __FUNCT__ 8092 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 8093 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8094 { 8095 PC_IS *pcis=(PC_IS*)pc->data; 8096 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8097 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8098 Mat S_j; 8099 PetscInt *used_xadj,*used_adjncy; 8100 PetscBool free_used_adj; 8101 PetscErrorCode ierr; 8102 8103 PetscFunctionBegin; 8104 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8105 free_used_adj = PETSC_FALSE; 8106 if (pcbddc->sub_schurs_layers == -1) { 8107 used_xadj = NULL; 8108 used_adjncy = NULL; 8109 } else { 8110 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8111 used_xadj = pcbddc->mat_graph->xadj; 8112 used_adjncy = pcbddc->mat_graph->adjncy; 8113 } else if (pcbddc->computed_rowadj) { 8114 used_xadj = pcbddc->mat_graph->xadj; 8115 used_adjncy = pcbddc->mat_graph->adjncy; 8116 } else { 8117 PetscBool flg_row=PETSC_FALSE; 8118 const PetscInt *xadj,*adjncy; 8119 PetscInt nvtxs; 8120 8121 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8122 if (flg_row) { 8123 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8124 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8125 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8126 free_used_adj = PETSC_TRUE; 8127 } else { 8128 pcbddc->sub_schurs_layers = -1; 8129 used_xadj = NULL; 8130 used_adjncy = NULL; 8131 } 8132 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8133 } 8134 } 8135 8136 /* setup sub_schurs data */ 8137 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8138 if (!sub_schurs->schur_explicit) { 8139 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8140 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8141 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8142 } else { 8143 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8144 PetscBool isseqaij,need_change = PETSC_FALSE; 8145 PetscInt benign_n; 8146 Mat change = NULL; 8147 Vec scaling = NULL; 8148 IS change_primal = NULL; 8149 8150 if (!pcbddc->use_vertices && reuse_solvers) { 8151 PetscInt n_vertices; 8152 8153 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8154 reuse_solvers = (PetscBool)!n_vertices; 8155 } 8156 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8157 if (!isseqaij) { 8158 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8159 if (matis->A == pcbddc->local_mat) { 8160 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8161 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8162 } else { 8163 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8164 } 8165 } 8166 if (!pcbddc->benign_change_explicit) { 8167 benign_n = pcbddc->benign_n; 8168 } else { 8169 benign_n = 0; 8170 } 8171 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8172 We need a global reduction to avoid possible deadlocks. 8173 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8174 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8175 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8176 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8177 need_change = (PetscBool)(!need_change); 8178 } 8179 /* If the user defines additional constraints, we import them here. 8180 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8181 if (need_change) { 8182 PC_IS *pcisf; 8183 PC_BDDC *pcbddcf; 8184 PC pcf; 8185 8186 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8187 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8188 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8189 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8190 /* hacks */ 8191 pcisf = (PC_IS*)pcf->data; 8192 pcisf->is_B_local = pcis->is_B_local; 8193 pcisf->vec1_N = pcis->vec1_N; 8194 pcisf->BtoNmap = pcis->BtoNmap; 8195 pcisf->n = pcis->n; 8196 pcisf->n_B = pcis->n_B; 8197 pcbddcf = (PC_BDDC*)pcf->data; 8198 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8199 pcbddcf->mat_graph = pcbddc->mat_graph; 8200 pcbddcf->use_faces = PETSC_TRUE; 8201 pcbddcf->use_change_of_basis = PETSC_TRUE; 8202 pcbddcf->use_change_on_faces = PETSC_TRUE; 8203 pcbddcf->use_qr_single = PETSC_TRUE; 8204 pcbddcf->fake_change = PETSC_TRUE; 8205 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8206 /* store information on primal vertices and change of basis (in local numbering) */ 8207 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8208 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8209 change = pcbddcf->ConstraintMatrix; 8210 pcbddcf->ConstraintMatrix = NULL; 8211 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8212 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8213 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8214 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8215 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8216 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8217 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8218 pcf->ops->destroy = NULL; 8219 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8220 } 8221 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8222 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8223 ierr = MatDestroy(&change);CHKERRQ(ierr); 8224 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8225 } 8226 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8227 8228 /* free adjacency */ 8229 if (free_used_adj) { 8230 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8231 } 8232 PetscFunctionReturn(0); 8233 } 8234 8235 #undef __FUNCT__ 8236 #define __FUNCT__ "PCBDDCInitSubSchurs" 8237 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8238 { 8239 PC_IS *pcis=(PC_IS*)pc->data; 8240 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8241 PCBDDCGraph graph; 8242 PetscErrorCode ierr; 8243 8244 PetscFunctionBegin; 8245 /* attach interface graph for determining subsets */ 8246 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8247 IS verticesIS,verticescomm; 8248 PetscInt vsize,*idxs; 8249 8250 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8251 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8252 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8253 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8254 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8255 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8256 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8257 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8258 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8259 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8260 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8261 } else { 8262 graph = pcbddc->mat_graph; 8263 } 8264 /* print some info */ 8265 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8266 IS vertices; 8267 PetscInt nv,nedges,nfaces; 8268 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8269 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8270 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8271 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8272 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8273 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8276 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8277 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8278 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8279 } 8280 8281 /* sub_schurs init */ 8282 if (!pcbddc->sub_schurs) { 8283 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8284 } 8285 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8286 8287 /* free graph struct */ 8288 if (pcbddc->sub_schurs_rebuild) { 8289 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8290 } 8291 PetscFunctionReturn(0); 8292 } 8293 8294 #undef __FUNCT__ 8295 #define __FUNCT__ "PCBDDCCheckOperator" 8296 PetscErrorCode PCBDDCCheckOperator(PC pc) 8297 { 8298 PC_IS *pcis=(PC_IS*)pc->data; 8299 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8300 PetscErrorCode ierr; 8301 8302 PetscFunctionBegin; 8303 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8304 IS zerodiag = NULL; 8305 Mat S_j,B0_B=NULL; 8306 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8307 PetscScalar *p0_check,*array,*array2; 8308 PetscReal norm; 8309 PetscInt i; 8310 8311 /* B0 and B0_B */ 8312 if (zerodiag) { 8313 IS dummy; 8314 8315 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8316 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8317 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8318 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8319 } 8320 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8321 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8322 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8323 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8324 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8325 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8326 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8327 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8328 /* S_j */ 8329 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8330 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8331 8332 /* mimic vector in \widetilde{W}_\Gamma */ 8333 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8334 /* continuous in primal space */ 8335 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8336 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8337 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8338 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8339 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8340 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8341 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8342 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8343 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8344 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8345 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8346 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8347 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8348 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8349 8350 /* assemble rhs for coarse problem */ 8351 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8352 /* local with Schur */ 8353 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8354 if (zerodiag) { 8355 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8356 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8357 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8358 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8359 } 8360 /* sum on primal nodes the local contributions */ 8361 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8362 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8363 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8364 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8365 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8366 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8367 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8368 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8369 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8370 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8371 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8372 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8373 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8374 /* scale primal nodes (BDDC sums contibutions) */ 8375 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8376 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8377 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8378 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8379 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8380 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8381 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8382 /* global: \widetilde{B0}_B w_\Gamma */ 8383 if (zerodiag) { 8384 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8385 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8386 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8387 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8388 } 8389 /* BDDC */ 8390 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8391 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8392 8393 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8394 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8395 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8396 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8397 for (i=0;i<pcbddc->benign_n;i++) { 8398 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8399 } 8400 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8401 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8402 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8403 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8404 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8405 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8406 } 8407 PetscFunctionReturn(0); 8408 } 8409 8410 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8411 #undef __FUNCT__ 8412 #define __FUNCT__ "MatMPIAIJRestrict" 8413 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8414 { 8415 Mat At; 8416 IS rows; 8417 PetscInt rst,ren; 8418 PetscErrorCode ierr; 8419 PetscLayout rmap; 8420 8421 PetscFunctionBegin; 8422 rst = ren = 0; 8423 if (ccomm != MPI_COMM_NULL) { 8424 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8425 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8426 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8427 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8428 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8429 } 8430 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8431 ierr = MatGetSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8432 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8433 8434 if (ccomm != MPI_COMM_NULL) { 8435 Mat_MPIAIJ *a,*b; 8436 IS from,to; 8437 Vec gvec; 8438 PetscInt lsize; 8439 8440 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8441 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8442 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8443 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8444 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8445 a = (Mat_MPIAIJ*)At->data; 8446 b = (Mat_MPIAIJ*)(*B)->data; 8447 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8448 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8449 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8450 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8451 b->A = a->A; 8452 b->B = a->B; 8453 8454 b->donotstash = a->donotstash; 8455 b->roworiented = a->roworiented; 8456 b->rowindices = 0; 8457 b->rowvalues = 0; 8458 b->getrowactive = PETSC_FALSE; 8459 8460 (*B)->rmap = rmap; 8461 (*B)->factortype = A->factortype; 8462 (*B)->assembled = PETSC_TRUE; 8463 (*B)->insertmode = NOT_SET_VALUES; 8464 (*B)->preallocated = PETSC_TRUE; 8465 8466 if (a->colmap) { 8467 #if defined(PETSC_USE_CTABLE) 8468 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8469 #else 8470 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8471 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8472 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8473 #endif 8474 } else b->colmap = 0; 8475 if (a->garray) { 8476 PetscInt len; 8477 len = a->B->cmap->n; 8478 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8479 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8480 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8481 } else b->garray = 0; 8482 8483 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8484 b->lvec = a->lvec; 8485 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8486 8487 /* cannot use VecScatterCopy */ 8488 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8489 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8490 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8491 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8492 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8493 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8494 ierr = ISDestroy(&from);CHKERRQ(ierr); 8495 ierr = ISDestroy(&to);CHKERRQ(ierr); 8496 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8497 } 8498 ierr = MatDestroy(&At);CHKERRQ(ierr); 8499 PetscFunctionReturn(0); 8500 } 8501