1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* if range is true, it returns B s.t. span{B} = range(A) 10 if range is false, it returns B s.t. range(B) _|_ range(A) */ 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseOrthogonalRangeOrComplement" 13 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 14 { 15 #if !defined(PETSC_USE_COMPLEX) 16 PetscScalar *uwork,*data,*U, ds = 0.; 17 PetscReal *sing; 18 PetscBLASInt bM,bN,lwork,lierr,di = 1; 19 PetscInt ulw,i,nr,nc,n; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 #if defined(PETSC_MISSING_LAPACK_GESVD) 24 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 25 #else 26 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 49 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 50 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 51 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 52 ierr = PetscFPTrapPop();CHKERRQ(ierr); 53 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 54 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 55 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 56 if (!rwork) { 57 ierr = PetscFree(sing);CHKERRQ(ierr); 58 } 59 if (!work) { 60 ierr = PetscFree(uwork);CHKERRQ(ierr); 61 } 62 /* create B */ 63 if (!range) { 64 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 65 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 67 } else { 68 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 69 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 70 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 71 } 72 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscFree(U);CHKERRQ(ierr); 74 #endif 75 #else /* PETSC_USE_COMPLEX */ 76 PetscFunctionBegin; 77 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 78 #endif 79 PetscFunctionReturn(0); 80 } 81 82 /* TODO REMOVE */ 83 #if defined(PRINT_GDET) 84 static int inc = 0; 85 static int lev = 0; 86 #endif 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatGetSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "PCBDDCNedelecSupport" 157 PetscErrorCode PCBDDCNedelecSupport(PC pc) 158 { 159 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 160 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 161 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 162 Vec tvec; 163 PetscSF sfv; 164 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 165 MPI_Comm comm; 166 IS lned,primals,allprimals,nedfieldlocal; 167 IS *eedges,*extrows,*extcols,*alleedges; 168 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 169 PetscScalar *vals,*work; 170 PetscReal *rwork; 171 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 172 PetscInt ne,nv,Lv,order,n,field; 173 PetscInt n_neigh,*neigh,*n_shared,**shared; 174 PetscInt i,j,extmem,cum,maxsize,nee; 175 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 176 PetscInt *sfvleaves,*sfvroots; 177 PetscInt *corners,*cedges; 178 PetscInt *ecount,**eneighs,*vcount,**vneighs; 179 #if defined(PETSC_USE_DEBUG) 180 PetscInt *emarks; 181 #endif 182 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 183 PetscErrorCode ierr; 184 185 PetscFunctionBegin; 186 /* If the discrete gradient is defined for a subset of dofs and global is true, 187 it assumes G is given in global ordering for all the dofs. 188 Otherwise, the ordering is global for the Nedelec field */ 189 order = pcbddc->nedorder; 190 conforming = pcbddc->conforming; 191 field = pcbddc->nedfield; 192 global = pcbddc->nedglobal; 193 setprimal = PETSC_FALSE; 194 print = PETSC_FALSE; 195 singular = PETSC_FALSE; 196 197 /* Command line customization */ 198 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 199 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsInt ("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 202 /* print debug info TODO: to be removed */ 203 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 204 ierr = PetscOptionsEnd();CHKERRQ(ierr); 205 206 /* Return if there are no edges in the decomposition and the problem is not singular */ 207 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 208 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 209 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 210 if (!singular) { 211 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 212 lrc[0] = PETSC_FALSE; 213 for (i=0;i<n;i++) { 214 if (PetscRealPart(vals[i]) > 2.) { 215 lrc[0] = PETSC_TRUE; 216 break; 217 } 218 } 219 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 220 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 221 if (!lrc[1]) PetscFunctionReturn(0); 222 } 223 224 /* Get Nedelec field */ 225 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 226 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 227 if (pcbddc->n_ISForDofsLocal && field >= 0) { 228 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 229 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 230 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 231 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 232 ne = n; 233 nedfieldlocal = NULL; 234 global = PETSC_TRUE; 235 } else if (field == PETSC_DECIDE) { 236 PetscInt rst,ren,*idx; 237 238 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 239 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 240 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 241 for (i=rst;i<ren;i++) { 242 PetscInt nc; 243 244 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 246 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 247 } 248 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 249 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 250 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 251 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 252 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 253 } else { 254 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 255 } 256 257 /* Sanity checks */ 258 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 259 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 260 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 261 262 /* Just set primal dofs and return */ 263 if (setprimal) { 264 IS enedfieldlocal; 265 PetscInt *eidxs; 266 267 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 268 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 269 if (nedfieldlocal) { 270 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 271 for (i=0,cum=0;i<ne;i++) { 272 if (PetscRealPart(vals[idxs[i]]) > 2.) { 273 eidxs[cum++] = idxs[i]; 274 } 275 } 276 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 277 } else { 278 for (i=0,cum=0;i<ne;i++) { 279 if (PetscRealPart(vals[i]) > 2.) { 280 eidxs[cum++] = i; 281 } 282 } 283 } 284 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 285 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 286 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 287 ierr = PetscFree(eidxs);CHKERRQ(ierr); 288 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 289 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 /* Compute some l2g maps */ 294 if (nedfieldlocal) { 295 IS is; 296 297 /* need to map from the local Nedelec field to local numbering */ 298 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 299 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 300 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 301 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 302 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 303 if (global) { 304 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 305 el2g = al2g; 306 } else { 307 IS gis; 308 309 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 310 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 311 ierr = ISDestroy(&gis);CHKERRQ(ierr); 312 } 313 ierr = ISDestroy(&is);CHKERRQ(ierr); 314 } else { 315 /* restore default */ 316 pcbddc->nedfield = -1; 317 /* one ref for the destruction of al2g, one for el2g */ 318 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 319 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 320 el2g = al2g; 321 fl2g = NULL; 322 } 323 324 /* Start communication to drop connections for interior edges (for cc analysis only) */ 325 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 326 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 327 if (nedfieldlocal) { 328 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 330 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 331 } else { 332 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 333 } 334 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 335 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 336 337 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 338 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 339 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 340 if (global) { 341 PetscInt rst; 342 343 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 344 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 345 if (matis->sf_rootdata[i] < 2) { 346 matis->sf_rootdata[cum++] = i + rst; 347 } 348 } 349 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 350 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 351 } else { 352 PetscInt *tbz; 353 354 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 355 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 356 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 357 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 for (i=0,cum=0;i<ne;i++) 359 if (matis->sf_leafdata[idxs[i]] == 1) 360 tbz[cum++] = i; 361 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 362 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 363 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 364 ierr = PetscFree(tbz);CHKERRQ(ierr); 365 } 366 } else { /* we need the entire G to infer the nullspace */ 367 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 368 G = pcbddc->discretegradient; 369 } 370 371 /* Extract subdomain relevant rows of G */ 372 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 373 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 374 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 375 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 376 ierr = ISDestroy(&lned);CHKERRQ(ierr); 377 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 378 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 379 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 380 381 /* SF for nodal dofs communications */ 382 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 383 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 384 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 386 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 388 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 389 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 390 i = singular ? 2 : 1; 391 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 392 393 /* Destroy temporary G created in MATIS format and modified G */ 394 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 395 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 396 ierr = MatDestroy(&G);CHKERRQ(ierr); 397 398 if (print) { 399 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 400 ierr = MatView(lG,NULL);CHKERRQ(ierr); 401 } 402 403 /* Save lG for values insertion in change of basis */ 404 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 405 406 /* Analyze the edge-nodes connections (duplicate lG) */ 407 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 408 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 411 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 412 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 413 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 414 /* need to import the boundary specification to ensure the 415 proper detection of coarse edges' endpoints */ 416 if (pcbddc->DirichletBoundariesLocal) { 417 IS is; 418 419 if (fl2g) { 420 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 421 } else { 422 is = pcbddc->DirichletBoundariesLocal; 423 } 424 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 425 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 426 for (i=0;i<cum;i++) { 427 if (idxs[i] >= 0) { 428 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 429 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 430 } 431 } 432 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 433 if (fl2g) { 434 ierr = ISDestroy(&is);CHKERRQ(ierr); 435 } 436 } 437 if (pcbddc->NeumannBoundariesLocal) { 438 IS is; 439 440 if (fl2g) { 441 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 442 } else { 443 is = pcbddc->NeumannBoundariesLocal; 444 } 445 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 446 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 447 for (i=0;i<cum;i++) { 448 if (idxs[i] >= 0) { 449 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 450 } 451 } 452 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 453 if (fl2g) { 454 ierr = ISDestroy(&is);CHKERRQ(ierr); 455 } 456 } 457 458 /* Count neighs per dof */ 459 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 460 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 461 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 462 for (i=1,cum=0;i<n_neigh;i++) { 463 cum += n_shared[i]; 464 for (j=0;j<n_shared[i];j++) { 465 ecount[shared[i][j]]++; 466 } 467 } 468 if (ne) { 469 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 470 } 471 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 472 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 473 for (i=1;i<n_neigh;i++) { 474 for (j=0;j<n_shared[i];j++) { 475 PetscInt k = shared[i][j]; 476 eneighs[k][ecount[k]] = neigh[i]; 477 ecount[k]++; 478 } 479 } 480 for (i=0;i<ne;i++) { 481 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 482 } 483 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 484 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 485 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 486 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 487 for (i=1,cum=0;i<n_neigh;i++) { 488 cum += n_shared[i]; 489 for (j=0;j<n_shared[i];j++) { 490 vcount[shared[i][j]]++; 491 } 492 } 493 if (nv) { 494 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 495 } 496 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 497 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 498 for (i=1;i<n_neigh;i++) { 499 for (j=0;j<n_shared[i];j++) { 500 PetscInt k = shared[i][j]; 501 vneighs[k][vcount[k]] = neigh[i]; 502 vcount[k]++; 503 } 504 } 505 for (i=0;i<nv;i++) { 506 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 507 } 508 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 509 510 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 511 for proper detection of coarse edges' endpoints */ 512 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 513 for (i=0;i<ne;i++) { 514 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 515 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 516 } 517 } 518 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 519 if (!conforming) { 520 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 521 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 522 } 523 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 524 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 525 cum = 0; 526 for (i=0;i<ne;i++) { 527 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 528 if (!PetscBTLookup(btee,i)) { 529 marks[cum++] = i; 530 continue; 531 } 532 /* set badly connected edge dofs as primal */ 533 if (!conforming) { 534 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 535 marks[cum++] = i; 536 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 537 for (j=ii[i];j<ii[i+1];j++) { 538 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 539 } 540 } else { 541 /* every edge dofs should be connected trough a certain number of nodal dofs 542 to other edge dofs belonging to coarse edges 543 - at most 2 endpoints 544 - order-1 interior nodal dofs 545 - no undefined nodal dofs (nconn < order) 546 */ 547 PetscInt ends = 0,ints = 0, undef = 0; 548 for (j=ii[i];j<ii[i+1];j++) { 549 PetscInt v = jj[j],k; 550 PetscInt nconn = iit[v+1]-iit[v]; 551 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 552 if (nconn > order) ends++; 553 else if (nconn == order) ints++; 554 else undef++; 555 } 556 if (undef || ends > 2 || ints != order -1) { 557 marks[cum++] = i; 558 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 559 for (j=ii[i];j<ii[i+1];j++) { 560 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 561 } 562 } 563 } 564 } 565 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 566 if (!order && ii[i+1] != ii[i]) { 567 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 568 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 569 } 570 } 571 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 572 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 573 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 574 if (!conforming) { 575 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 576 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 577 } 578 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 579 580 /* identify splitpoints and corner candidates */ 581 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 582 if (print) { 583 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 584 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 585 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 586 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 587 } 588 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 589 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 590 for (i=0;i<nv;i++) { 591 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 592 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 593 if (!order) { /* variable order */ 594 PetscReal vorder = 0.; 595 596 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 597 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 598 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 599 ord = 1; 600 } 601 #if defined(PETSC_USE_DEBUG) 602 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 603 #endif 604 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 605 if (PetscBTLookup(btbd,jj[j])) { 606 bdir = PETSC_TRUE; 607 break; 608 } 609 if (vc != ecount[jj[j]]) { 610 sneighs = PETSC_FALSE; 611 } else { 612 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 613 for (k=0;k<vc;k++) { 614 if (vn[k] != en[k]) { 615 sneighs = PETSC_FALSE; 616 break; 617 } 618 } 619 } 620 } 621 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 622 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 623 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 624 } else if (test == ord) { 625 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 626 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 627 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 628 } else { 629 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 630 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 631 } 632 } 633 } 634 ierr = PetscFree(ecount);CHKERRQ(ierr); 635 ierr = PetscFree(vcount);CHKERRQ(ierr); 636 if (ne) { 637 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 638 } 639 if (nv) { 640 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 641 } 642 ierr = PetscFree(eneighs);CHKERRQ(ierr); 643 ierr = PetscFree(vneighs);CHKERRQ(ierr); 644 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 645 646 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 647 if (order != 1) { 648 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 649 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 650 for (i=0;i<nv;i++) { 651 if (PetscBTLookup(btvcand,i)) { 652 PetscBool found = PETSC_FALSE; 653 for (j=ii[i];j<ii[i+1] && !found;j++) { 654 PetscInt k,e = jj[j]; 655 if (PetscBTLookup(bte,e)) continue; 656 for (k=iit[e];k<iit[e+1];k++) { 657 PetscInt v = jjt[k]; 658 if (v != i && PetscBTLookup(btvcand,v)) { 659 found = PETSC_TRUE; 660 break; 661 } 662 } 663 } 664 if (!found) { 665 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 666 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 667 } else { 668 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 669 } 670 } 671 } 672 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 673 } 674 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 675 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 676 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 677 678 /* Get the local G^T explicitly */ 679 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 680 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 681 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 682 683 /* Mark interior nodal dofs */ 684 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 685 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 686 for (i=1;i<n_neigh;i++) { 687 for (j=0;j<n_shared[i];j++) { 688 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 689 } 690 } 691 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 692 693 /* communicate corners and splitpoints */ 694 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 695 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 696 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 697 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 698 699 if (print) { 700 IS tbz; 701 702 cum = 0; 703 for (i=0;i<nv;i++) 704 if (sfvleaves[i]) 705 vmarks[cum++] = i; 706 707 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 708 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 709 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 710 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 711 } 712 713 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 714 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 715 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 716 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 717 718 /* Zero rows of lGt corresponding to identified corners 719 and interior nodal dofs */ 720 cum = 0; 721 for (i=0;i<nv;i++) { 722 if (sfvleaves[i]) { 723 vmarks[cum++] = i; 724 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 725 } 726 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 727 } 728 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 729 if (print) { 730 IS tbz; 731 732 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 733 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 734 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 735 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 736 } 737 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 738 ierr = PetscFree(vmarks);CHKERRQ(ierr); 739 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 740 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 741 742 /* Recompute G */ 743 ierr = MatDestroy(&lG);CHKERRQ(ierr); 744 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 745 if (print) { 746 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 747 ierr = MatView(lG,NULL);CHKERRQ(ierr); 748 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 749 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 750 } 751 752 /* Get primal dofs (if any) */ 753 cum = 0; 754 for (i=0;i<ne;i++) { 755 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 756 } 757 if (fl2g) { 758 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 759 } 760 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 761 if (print) { 762 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 763 ierr = ISView(primals,NULL);CHKERRQ(ierr); 764 } 765 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 766 /* TODO: what if the user passed in some of them ? */ 767 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 768 ierr = ISDestroy(&primals);CHKERRQ(ierr); 769 770 /* Compute edge connectivity */ 771 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 772 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 773 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 774 if (fl2g) { 775 PetscBT btf; 776 PetscInt *iia,*jja,*iiu,*jju; 777 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 778 779 /* create CSR for all local dofs */ 780 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 781 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 782 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 783 iiu = pcbddc->mat_graph->xadj; 784 jju = pcbddc->mat_graph->adjncy; 785 } else if (pcbddc->use_local_adj) { 786 rest = PETSC_TRUE; 787 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 788 } else { 789 free = PETSC_TRUE; 790 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 791 iiu[0] = 0; 792 for (i=0;i<n;i++) { 793 iiu[i+1] = i+1; 794 jju[i] = -1; 795 } 796 } 797 798 /* import sizes of CSR */ 799 iia[0] = 0; 800 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 801 802 /* overwrite entries corresponding to the Nedelec field */ 803 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 804 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 805 for (i=0;i<ne;i++) { 806 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 807 iia[idxs[i]+1] = ii[i+1]-ii[i]; 808 } 809 810 /* iia in CSR */ 811 for (i=0;i<n;i++) iia[i+1] += iia[i]; 812 813 /* jja in CSR */ 814 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 815 for (i=0;i<n;i++) 816 if (!PetscBTLookup(btf,i)) 817 for (j=0;j<iiu[i+1]-iiu[i];j++) 818 jja[iia[i]+j] = jju[iiu[i]+j]; 819 820 /* map edge dofs connectivity */ 821 if (jj) { 822 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 823 for (i=0;i<ne;i++) { 824 PetscInt e = idxs[i]; 825 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 826 } 827 } 828 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 829 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 830 if (rest) { 831 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 832 } 833 if (free) { 834 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 835 } 836 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 837 } else { 838 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 839 } 840 841 /* Analyze interface for edge dofs */ 842 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 843 pcbddc->mat_graph->twodim = PETSC_FALSE; 844 845 /* Get coarse edges in the edge space */ 846 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 847 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 848 849 if (fl2g) { 850 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 851 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 852 for (i=0;i<nee;i++) { 853 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 854 } 855 } else { 856 eedges = alleedges; 857 primals = allprimals; 858 } 859 860 /* Mark fine edge dofs with their coarse edge id */ 861 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 862 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 863 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 864 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 865 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 866 if (print) { 867 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 868 ierr = ISView(primals,NULL);CHKERRQ(ierr); 869 } 870 871 maxsize = 0; 872 for (i=0;i<nee;i++) { 873 PetscInt size,mark = i+1; 874 875 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 876 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 for (j=0;j<size;j++) marks[idxs[j]] = mark; 878 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 879 maxsize = PetscMax(maxsize,size); 880 } 881 882 /* Find coarse edge endpoints */ 883 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 884 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 885 for (i=0;i<nee;i++) { 886 PetscInt mark = i+1,size; 887 888 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 889 if (!size && nedfieldlocal) continue; 890 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 891 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 892 if (print) { 893 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 894 ISView(eedges[i],NULL); 895 } 896 for (j=0;j<size;j++) { 897 PetscInt k, ee = idxs[j]; 898 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 899 for (k=ii[ee];k<ii[ee+1];k++) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 901 if (PetscBTLookup(btv,jj[k])) { 902 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 903 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 904 PetscInt k2; 905 PetscBool corner = PETSC_FALSE; 906 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 907 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 908 /* it's a corner if either is connected with an edge dof belonging to a different cc or 909 if the edge dof lie on the natural part of the boundary */ 910 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 911 corner = PETSC_TRUE; 912 break; 913 } 914 } 915 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 916 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 917 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 918 } else { 919 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 920 } 921 } 922 } 923 } 924 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 925 } 926 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 927 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 928 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 929 930 /* Reset marked primal dofs */ 931 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 932 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 933 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 934 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 935 936 /* Now use the initial lG */ 937 ierr = MatDestroy(&lG);CHKERRQ(ierr); 938 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 939 lG = lGinit; 940 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 941 942 /* Compute extended cols indices */ 943 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 944 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 945 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 946 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 947 i *= maxsize; 948 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 949 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 950 eerr = PETSC_FALSE; 951 for (i=0;i<nee;i++) { 952 PetscInt size,found = 0; 953 954 cum = 0; 955 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 956 if (!size && nedfieldlocal) continue; 957 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 958 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 959 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 960 for (j=0;j<size;j++) { 961 PetscInt k,ee = idxs[j]; 962 for (k=ii[ee];k<ii[ee+1];k++) { 963 PetscInt vv = jj[k]; 964 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 965 else if (!PetscBTLookupSet(btvc,vv)) found++; 966 } 967 } 968 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 969 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 970 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 971 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 972 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 973 /* it may happen that endpoints are not defined at this point 974 if it is the case, mark this edge for a second pass */ 975 if (cum != size -1 || found != 2) { 976 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 977 if (print) { 978 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 979 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 980 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 981 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 982 } 983 eerr = PETSC_TRUE; 984 } 985 } 986 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 987 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 988 if (done) { 989 PetscInt *newprimals; 990 991 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 992 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 993 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 995 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 996 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 997 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 998 for (i=0;i<nee;i++) { 999 PetscBool has_candidates = PETSC_FALSE; 1000 if (PetscBTLookup(bter,i)) { 1001 PetscInt size,mark = i+1; 1002 1003 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1004 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1005 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1006 for (j=0;j<size;j++) { 1007 PetscInt k,ee = idxs[j]; 1008 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1009 for (k=ii[ee];k<ii[ee+1];k++) { 1010 /* set all candidates located on the edge as corners */ 1011 if (PetscBTLookup(btvcand,jj[k])) { 1012 PetscInt k2,vv = jj[k]; 1013 has_candidates = PETSC_TRUE; 1014 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1015 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1016 /* set all edge dofs connected to candidate as primals */ 1017 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1018 if (marks[jjt[k2]] == mark) { 1019 PetscInt k3,ee2 = jjt[k2]; 1020 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1021 newprimals[cum++] = ee2; 1022 /* finally set the new corners */ 1023 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1024 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1025 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1026 } 1027 } 1028 } 1029 } else { 1030 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1031 } 1032 } 1033 } 1034 if (!has_candidates) { /* circular edge */ 1035 PetscInt k, ee = idxs[0],*tmarks; 1036 1037 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1038 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1039 for (k=ii[ee];k<ii[ee+1];k++) { 1040 PetscInt k2; 1041 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1042 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1043 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1044 } 1045 for (j=0;j<size;j++) { 1046 if (tmarks[idxs[j]] > 1) { 1047 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1048 newprimals[cum++] = idxs[j]; 1049 } 1050 } 1051 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1052 } 1053 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1054 } 1055 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1056 } 1057 ierr = PetscFree(extcols);CHKERRQ(ierr); 1058 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1059 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1060 if (fl2g) { 1061 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1062 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1063 for (i=0;i<nee;i++) { 1064 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1065 } 1066 ierr = PetscFree(eedges);CHKERRQ(ierr); 1067 } 1068 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1069 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1070 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1071 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1072 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1073 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1074 pcbddc->mat_graph->twodim = PETSC_FALSE; 1075 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1076 if (fl2g) { 1077 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1078 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1079 for (i=0;i<nee;i++) { 1080 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1081 } 1082 } else { 1083 eedges = alleedges; 1084 primals = allprimals; 1085 } 1086 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1087 1088 /* Mark again */ 1089 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1090 for (i=0;i<nee;i++) { 1091 PetscInt size,mark = i+1; 1092 1093 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1094 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1096 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1097 } 1098 if (print) { 1099 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1100 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1101 } 1102 1103 /* Recompute extended cols */ 1104 eerr = PETSC_FALSE; 1105 for (i=0;i<nee;i++) { 1106 PetscInt size; 1107 1108 cum = 0; 1109 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1110 if (!size && nedfieldlocal) continue; 1111 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1112 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1113 for (j=0;j<size;j++) { 1114 PetscInt k,ee = idxs[j]; 1115 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1116 } 1117 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1118 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1119 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1120 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1121 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1122 if (cum != size -1) { 1123 if (print) { 1124 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1127 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1128 } 1129 eerr = PETSC_TRUE; 1130 } 1131 } 1132 } 1133 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1134 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1135 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1136 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1137 /* an error should not occur at this point */ 1138 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1139 1140 /* Check the number of endpoints */ 1141 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1142 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1143 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1144 for (i=0;i<nee;i++) { 1145 PetscInt size, found = 0, gc[2]; 1146 1147 /* init with defaults */ 1148 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1149 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1150 if (!size && nedfieldlocal) continue; 1151 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1152 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1153 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1154 for (j=0;j<size;j++) { 1155 PetscInt k,ee = idxs[j]; 1156 for (k=ii[ee];k<ii[ee+1];k++) { 1157 PetscInt vv = jj[k]; 1158 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1159 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1160 corners[i*2+found++] = vv; 1161 } 1162 } 1163 } 1164 if (found != 2) { 1165 PetscInt e; 1166 if (fl2g) { 1167 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1168 } else { 1169 e = idxs[0]; 1170 } 1171 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1172 } 1173 1174 /* get primal dof index on this coarse edge */ 1175 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1176 if (gc[0] > gc[1]) { 1177 PetscInt swap = corners[2*i]; 1178 corners[2*i] = corners[2*i+1]; 1179 corners[2*i+1] = swap; 1180 } 1181 cedges[i] = idxs[size-1]; 1182 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1183 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1184 } 1185 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1186 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1187 1188 #if defined(PETSC_USE_DEBUG) 1189 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1190 not interfere with neighbouring coarse edges */ 1191 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1192 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1193 for (i=0;i<nv;i++) { 1194 PetscInt emax = 0,eemax = 0; 1195 1196 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1197 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1198 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1199 for (j=1;j<nee+1;j++) { 1200 if (emax < emarks[j]) { 1201 emax = emarks[j]; 1202 eemax = j; 1203 } 1204 } 1205 /* not relevant for edges */ 1206 if (!eemax) continue; 1207 1208 for (j=ii[i];j<ii[i+1];j++) { 1209 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1210 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1211 } 1212 } 1213 } 1214 ierr = PetscFree(emarks);CHKERRQ(ierr); 1215 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1216 #endif 1217 1218 /* Compute extended rows indices for edge blocks of the change of basis */ 1219 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1220 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1221 extmem *= maxsize; 1222 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1223 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1224 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1225 for (i=0;i<nv;i++) { 1226 PetscInt mark = 0,size,start; 1227 1228 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1229 for (j=ii[i];j<ii[i+1];j++) 1230 if (marks[jj[j]] && !mark) 1231 mark = marks[jj[j]]; 1232 1233 /* not relevant */ 1234 if (!mark) continue; 1235 1236 /* import extended row */ 1237 mark--; 1238 start = mark*extmem+extrowcum[mark]; 1239 size = ii[i+1]-ii[i]; 1240 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1241 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1242 extrowcum[mark] += size; 1243 } 1244 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1245 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1246 ierr = PetscFree(marks);CHKERRQ(ierr); 1247 1248 /* Compress extrows */ 1249 cum = 0; 1250 for (i=0;i<nee;i++) { 1251 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1252 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1253 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1254 cum = PetscMax(cum,size); 1255 } 1256 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1257 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1258 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1259 1260 /* Workspace for lapack inner calls and VecSetValues */ 1261 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1262 1263 /* Create change of basis matrix (preallocation can be improved) */ 1264 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1265 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1266 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1267 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1268 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1269 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1270 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1271 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1272 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1273 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1274 1275 /* Defaults to identity */ 1276 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1277 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1278 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1279 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1280 1281 /* Create discrete gradient for the coarser level if needed */ 1282 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1283 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1284 if (pcbddc->current_level < pcbddc->max_levels) { 1285 ISLocalToGlobalMapping cel2g,cvl2g; 1286 IS wis,gwis; 1287 PetscInt cnv,cne; 1288 1289 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1290 if (fl2g) { 1291 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1292 } else { 1293 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1294 pcbddc->nedclocal = wis; 1295 } 1296 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1297 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1298 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1299 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1300 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1301 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1302 1303 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1304 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1305 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1306 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1307 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1308 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1309 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1310 1311 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1312 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1313 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1314 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1315 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1316 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1317 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1318 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1319 } 1320 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1321 1322 #if defined(PRINT_GDET) 1323 inc = 0; 1324 lev = pcbddc->current_level; 1325 #endif 1326 1327 /* Insert values in the change of basis matrix */ 1328 for (i=0;i<nee;i++) { 1329 Mat Gins = NULL, GKins = NULL; 1330 IS cornersis = NULL; 1331 PetscScalar cvals[2]; 1332 1333 if (pcbddc->nedcG) { 1334 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1335 } 1336 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1337 if (Gins && GKins) { 1338 PetscScalar *data; 1339 const PetscInt *rows,*cols; 1340 PetscInt nrh,nch,nrc,ncc; 1341 1342 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1343 /* H1 */ 1344 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1345 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1346 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1348 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1349 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1350 /* complement */ 1351 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1352 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1353 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1354 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1355 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1356 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1357 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1358 1359 /* coarse discrete gradient */ 1360 if (pcbddc->nedcG) { 1361 PetscInt cols[2]; 1362 1363 cols[0] = 2*i; 1364 cols[1] = 2*i+1; 1365 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1366 } 1367 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1368 } 1369 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1370 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1371 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1372 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1373 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1374 } 1375 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1376 1377 /* Start assembling */ 1378 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 if (pcbddc->nedcG) { 1380 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1381 } 1382 1383 /* Free */ 1384 if (fl2g) { 1385 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1386 for (i=0;i<nee;i++) { 1387 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1388 } 1389 ierr = PetscFree(eedges);CHKERRQ(ierr); 1390 } 1391 1392 /* hack mat_graph with primal dofs on the coarse edges */ 1393 { 1394 PCBDDCGraph graph = pcbddc->mat_graph; 1395 PetscInt *oqueue = graph->queue; 1396 PetscInt *ocptr = graph->cptr; 1397 PetscInt ncc,*idxs; 1398 1399 /* find first primal edge */ 1400 if (pcbddc->nedclocal) { 1401 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1402 } else { 1403 if (fl2g) { 1404 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1405 } 1406 idxs = cedges; 1407 } 1408 cum = 0; 1409 while (cum < nee && cedges[cum] < 0) cum++; 1410 1411 /* adapt connected components */ 1412 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1413 graph->cptr[0] = 0; 1414 for (i=0,ncc=0;i<graph->ncc;i++) { 1415 PetscInt lc = ocptr[i+1]-ocptr[i]; 1416 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1417 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1418 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1419 ncc++; 1420 lc--; 1421 cum++; 1422 while (cum < nee && cedges[cum] < 0) cum++; 1423 } 1424 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1425 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1426 ncc++; 1427 } 1428 graph->ncc = ncc; 1429 if (pcbddc->nedclocal) { 1430 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1431 } 1432 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1433 } 1434 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1435 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1436 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1437 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1438 1439 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1440 ierr = PetscFree(extrow);CHKERRQ(ierr); 1441 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1442 ierr = PetscFree(corners);CHKERRQ(ierr); 1443 ierr = PetscFree(cedges);CHKERRQ(ierr); 1444 ierr = PetscFree(extrows);CHKERRQ(ierr); 1445 ierr = PetscFree(extcols);CHKERRQ(ierr); 1446 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1447 1448 /* Complete assembling */ 1449 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 if (pcbddc->nedcG) { 1451 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1452 #if 0 1453 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1454 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1455 #endif 1456 } 1457 1458 /* set change of basis */ 1459 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1460 ierr = MatDestroy(&T);CHKERRQ(ierr); 1461 1462 PetscFunctionReturn(0); 1463 } 1464 1465 /* the near-null space of BDDC carries information on quadrature weights, 1466 and these can be collinear -> so cheat with MatNullSpaceCreate 1467 and create a suitable set of basis vectors first */ 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1470 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1471 { 1472 PetscErrorCode ierr; 1473 PetscInt i; 1474 1475 PetscFunctionBegin; 1476 for (i=0;i<nvecs;i++) { 1477 PetscInt first,last; 1478 1479 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1480 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1481 if (i>=first && i < last) { 1482 PetscScalar *data; 1483 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1484 if (!has_const) { 1485 data[i-first] = 1.; 1486 } else { 1487 data[2*i-first] = 1./PetscSqrtReal(2.); 1488 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1489 } 1490 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1491 } 1492 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1493 } 1494 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1495 for (i=0;i<nvecs;i++) { /* reset vectors */ 1496 PetscInt first,last; 1497 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1498 if (i>=first && i < last) { 1499 PetscScalar *data; 1500 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1501 if (!has_const) { 1502 data[i-first] = 0.; 1503 } else { 1504 data[2*i-first] = 0.; 1505 data[2*i-first+1] = 0.; 1506 } 1507 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1508 } 1509 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1516 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1517 { 1518 Mat loc_divudotp; 1519 Vec p,v,vins,quad_vec,*quad_vecs; 1520 ISLocalToGlobalMapping map; 1521 IS *faces,*edges; 1522 PetscScalar *vals; 1523 const PetscScalar *array; 1524 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1525 PetscMPIInt rank; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1530 if (graph->twodim) { 1531 lmaxneighs = 2; 1532 } else { 1533 lmaxneighs = 1; 1534 for (i=0;i<ne;i++) { 1535 const PetscInt *idxs; 1536 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1537 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1538 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1539 } 1540 lmaxneighs++; /* graph count does not include self */ 1541 } 1542 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1543 maxsize = 0; 1544 for (i=0;i<ne;i++) { 1545 PetscInt nn; 1546 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1547 maxsize = PetscMax(maxsize,nn); 1548 } 1549 for (i=0;i<nf;i++) { 1550 PetscInt nn; 1551 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1552 maxsize = PetscMax(maxsize,nn); 1553 } 1554 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1555 /* create vectors to hold quadrature weights */ 1556 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1557 if (!transpose) { 1558 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1559 } else { 1560 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1561 } 1562 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1563 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1564 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1565 for (i=0;i<maxneighs;i++) { 1566 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1567 } 1568 1569 /* compute local quad vec */ 1570 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1571 if (!transpose) { 1572 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1573 } else { 1574 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1575 } 1576 ierr = VecSet(p,1.);CHKERRQ(ierr); 1577 if (!transpose) { 1578 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1579 } else { 1580 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1581 } 1582 if (vl2l) { 1583 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1584 } else { 1585 vins = v; 1586 } 1587 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1588 ierr = VecDestroy(&p);CHKERRQ(ierr); 1589 1590 /* insert in global quadrature vecs */ 1591 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1592 for (i=0;i<nf;i++) { 1593 const PetscInt *idxs; 1594 PetscInt idx,nn,j; 1595 1596 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1597 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1598 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1599 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1600 idx = -(idx+1); 1601 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1602 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1603 } 1604 for (i=0;i<ne;i++) { 1605 const PetscInt *idxs; 1606 PetscInt idx,nn,j; 1607 1608 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1609 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1610 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1611 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1612 idx = -(idx+1); 1613 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1614 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1615 } 1616 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1617 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1618 if (vl2l) { 1619 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1620 } 1621 ierr = VecDestroy(&v);CHKERRQ(ierr); 1622 ierr = PetscFree(vals);CHKERRQ(ierr); 1623 1624 /* assemble near null space */ 1625 for (i=0;i<maxneighs;i++) { 1626 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1627 } 1628 for (i=0;i<maxneighs;i++) { 1629 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1630 } 1631 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1638 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1639 { 1640 PetscErrorCode ierr; 1641 Vec local,global; 1642 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1643 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1644 1645 PetscFunctionBegin; 1646 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1647 /* need to convert from global to local topology information and remove references to information in global ordering */ 1648 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1649 if (pcbddc->user_provided_isfordofs) { 1650 if (pcbddc->n_ISForDofs) { 1651 PetscInt i; 1652 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1653 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1654 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1655 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1656 } 1657 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1658 pcbddc->n_ISForDofs = 0; 1659 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1660 } 1661 } else { 1662 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1663 PetscInt i, n = matis->A->rmap->n; 1664 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1665 if (i > 1) { 1666 pcbddc->n_ISForDofsLocal = i; 1667 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1668 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1669 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1670 } 1671 } 1672 } 1673 } 1674 1675 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1676 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1677 } 1678 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1679 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1680 } 1681 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1682 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1683 } 1684 ierr = VecDestroy(&global);CHKERRQ(ierr); 1685 ierr = VecDestroy(&local);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1691 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1692 { 1693 PC_IS *pcis = (PC_IS*)(pc->data); 1694 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1695 PetscErrorCode ierr; 1696 1697 PetscFunctionBegin; 1698 if (!pcbddc->benign_have_null) { 1699 PetscFunctionReturn(0); 1700 } 1701 if (pcbddc->ChangeOfBasisMatrix) { 1702 Vec swap; 1703 1704 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1705 swap = pcbddc->work_change; 1706 pcbddc->work_change = r; 1707 r = swap; 1708 } 1709 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1710 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1711 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1712 ierr = VecSet(z,0.);CHKERRQ(ierr); 1713 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1714 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1715 if (pcbddc->ChangeOfBasisMatrix) { 1716 pcbddc->work_change = r; 1717 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1718 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1725 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1726 { 1727 PCBDDCBenignMatMult_ctx ctx; 1728 PetscErrorCode ierr; 1729 PetscBool apply_right,apply_left,reset_x; 1730 1731 PetscFunctionBegin; 1732 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1733 if (transpose) { 1734 apply_right = ctx->apply_left; 1735 apply_left = ctx->apply_right; 1736 } else { 1737 apply_right = ctx->apply_right; 1738 apply_left = ctx->apply_left; 1739 } 1740 reset_x = PETSC_FALSE; 1741 if (apply_right) { 1742 const PetscScalar *ax; 1743 PetscInt nl,i; 1744 1745 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1746 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1747 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1748 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1749 for (i=0;i<ctx->benign_n;i++) { 1750 PetscScalar sum,val; 1751 const PetscInt *idxs; 1752 PetscInt nz,j; 1753 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1754 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1755 sum = 0.; 1756 if (ctx->apply_p0) { 1757 val = ctx->work[idxs[nz-1]]; 1758 for (j=0;j<nz-1;j++) { 1759 sum += ctx->work[idxs[j]]; 1760 ctx->work[idxs[j]] += val; 1761 } 1762 } else { 1763 for (j=0;j<nz-1;j++) { 1764 sum += ctx->work[idxs[j]]; 1765 } 1766 } 1767 ctx->work[idxs[nz-1]] -= sum; 1768 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1769 } 1770 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1771 reset_x = PETSC_TRUE; 1772 } 1773 if (transpose) { 1774 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1775 } else { 1776 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1777 } 1778 if (reset_x) { 1779 ierr = VecResetArray(x);CHKERRQ(ierr); 1780 } 1781 if (apply_left) { 1782 PetscScalar *ay; 1783 PetscInt i; 1784 1785 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1786 for (i=0;i<ctx->benign_n;i++) { 1787 PetscScalar sum,val; 1788 const PetscInt *idxs; 1789 PetscInt nz,j; 1790 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1791 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1792 val = -ay[idxs[nz-1]]; 1793 if (ctx->apply_p0) { 1794 sum = 0.; 1795 for (j=0;j<nz-1;j++) { 1796 sum += ay[idxs[j]]; 1797 ay[idxs[j]] += val; 1798 } 1799 ay[idxs[nz-1]] += sum; 1800 } else { 1801 for (j=0;j<nz-1;j++) { 1802 ay[idxs[j]] += val; 1803 } 1804 ay[idxs[nz-1]] = 0.; 1805 } 1806 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1807 } 1808 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1815 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1816 { 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1821 PetscFunctionReturn(0); 1822 } 1823 1824 #undef __FUNCT__ 1825 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1826 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1827 { 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1832 PetscFunctionReturn(0); 1833 } 1834 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "PCBDDCBenignShellMat" 1837 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1838 { 1839 PC_IS *pcis = (PC_IS*)pc->data; 1840 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1841 PCBDDCBenignMatMult_ctx ctx; 1842 PetscErrorCode ierr; 1843 1844 PetscFunctionBegin; 1845 if (!restore) { 1846 Mat A_IB,A_BI; 1847 PetscScalar *work; 1848 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1849 1850 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1851 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1852 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1853 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1854 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1855 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1856 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1857 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1858 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1859 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1860 ctx->apply_left = PETSC_TRUE; 1861 ctx->apply_right = PETSC_FALSE; 1862 ctx->apply_p0 = PETSC_FALSE; 1863 ctx->benign_n = pcbddc->benign_n; 1864 if (reuse) { 1865 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1866 ctx->free = PETSC_FALSE; 1867 } else { /* TODO: could be optimized for successive solves */ 1868 ISLocalToGlobalMapping N_to_D; 1869 PetscInt i; 1870 1871 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1872 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1873 for (i=0;i<pcbddc->benign_n;i++) { 1874 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1875 } 1876 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1877 ctx->free = PETSC_TRUE; 1878 } 1879 ctx->A = pcis->A_IB; 1880 ctx->work = work; 1881 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1882 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1883 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1884 pcis->A_IB = A_IB; 1885 1886 /* A_BI as A_IB^T */ 1887 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1888 pcbddc->benign_original_mat = pcis->A_BI; 1889 pcis->A_BI = A_BI; 1890 } else { 1891 if (!pcbddc->benign_original_mat) { 1892 PetscFunctionReturn(0); 1893 } 1894 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1895 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1896 pcis->A_IB = ctx->A; 1897 ctx->A = NULL; 1898 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1899 pcis->A_BI = pcbddc->benign_original_mat; 1900 pcbddc->benign_original_mat = NULL; 1901 if (ctx->free) { 1902 PetscInt i; 1903 for (i=0;i<ctx->benign_n;i++) { 1904 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1905 } 1906 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1907 } 1908 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1909 ierr = PetscFree(ctx);CHKERRQ(ierr); 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 /* used just in bddc debug mode */ 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "PCBDDCBenignProject" 1917 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1918 { 1919 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1920 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1921 Mat An; 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1926 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1927 if (is1) { 1928 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1929 ierr = MatDestroy(&An);CHKERRQ(ierr); 1930 } else { 1931 *B = An; 1932 } 1933 PetscFunctionReturn(0); 1934 } 1935 1936 /* TODO: add reuse flag */ 1937 #undef __FUNCT__ 1938 #define __FUNCT__ "MatSeqAIJCompress" 1939 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1940 { 1941 Mat Bt; 1942 PetscScalar *a,*bdata; 1943 const PetscInt *ii,*ij; 1944 PetscInt m,n,i,nnz,*bii,*bij; 1945 PetscBool flg_row; 1946 PetscErrorCode ierr; 1947 1948 PetscFunctionBegin; 1949 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1950 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1951 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1952 nnz = n; 1953 for (i=0;i<ii[n];i++) { 1954 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1955 } 1956 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1957 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1958 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1959 nnz = 0; 1960 bii[0] = 0; 1961 for (i=0;i<n;i++) { 1962 PetscInt j; 1963 for (j=ii[i];j<ii[i+1];j++) { 1964 PetscScalar entry = a[j]; 1965 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1966 bij[nnz] = ij[j]; 1967 bdata[nnz] = entry; 1968 nnz++; 1969 } 1970 } 1971 bii[i+1] = nnz; 1972 } 1973 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1974 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1975 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1976 { 1977 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1978 b->free_a = PETSC_TRUE; 1979 b->free_ij = PETSC_TRUE; 1980 } 1981 *B = Bt; 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #undef __FUNCT__ 1986 #define __FUNCT__ "MatDetectDisconnectedComponents" 1987 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1988 { 1989 Mat B; 1990 IS is_dummy,*cc_n; 1991 ISLocalToGlobalMapping l2gmap_dummy; 1992 PCBDDCGraph graph; 1993 PetscInt i,n; 1994 PetscInt *xadj,*adjncy; 1995 PetscInt *xadj_filtered,*adjncy_filtered; 1996 PetscBool flg_row,isseqaij; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 if (!A->rmap->N || !A->cmap->N) { 2001 *ncc = 0; 2002 *cc = NULL; 2003 PetscFunctionReturn(0); 2004 } 2005 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2006 if (!isseqaij && filter) { 2007 PetscBool isseqdense; 2008 2009 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2010 if (!isseqdense) { 2011 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2012 } else { /* TODO: rectangular case and LDA */ 2013 PetscScalar *array; 2014 PetscReal chop=1.e-6; 2015 2016 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2017 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2018 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2019 for (i=0;i<n;i++) { 2020 PetscInt j; 2021 for (j=i+1;j<n;j++) { 2022 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2023 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2024 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2025 } 2026 } 2027 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2028 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2029 } 2030 } else { 2031 B = A; 2032 } 2033 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2034 2035 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2036 if (filter) { 2037 PetscScalar *data; 2038 PetscInt j,cum; 2039 2040 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2041 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2042 cum = 0; 2043 for (i=0;i<n;i++) { 2044 PetscInt t; 2045 2046 for (j=xadj[i];j<xadj[i+1];j++) { 2047 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2048 continue; 2049 } 2050 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2051 } 2052 t = xadj_filtered[i]; 2053 xadj_filtered[i] = cum; 2054 cum += t; 2055 } 2056 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2057 } else { 2058 xadj_filtered = NULL; 2059 adjncy_filtered = NULL; 2060 } 2061 2062 /* compute local connected components using PCBDDCGraph */ 2063 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2064 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2065 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2066 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2067 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2068 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2069 if (xadj_filtered) { 2070 graph->xadj = xadj_filtered; 2071 graph->adjncy = adjncy_filtered; 2072 } else { 2073 graph->xadj = xadj; 2074 graph->adjncy = adjncy; 2075 } 2076 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2077 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2078 /* partial clean up */ 2079 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2080 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2081 if (A != B) { 2082 ierr = MatDestroy(&B);CHKERRQ(ierr); 2083 } 2084 2085 /* get back data */ 2086 if (ncc) *ncc = graph->ncc; 2087 if (cc) { 2088 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2089 for (i=0;i<graph->ncc;i++) { 2090 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2091 } 2092 *cc = cc_n; 2093 } 2094 /* clean up graph */ 2095 graph->xadj = 0; 2096 graph->adjncy = 0; 2097 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2098 PetscFunctionReturn(0); 2099 } 2100 2101 #undef __FUNCT__ 2102 #define __FUNCT__ "PCBDDCBenignCheck" 2103 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2104 { 2105 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2106 PC_IS* pcis = (PC_IS*)(pc->data); 2107 IS dirIS = NULL; 2108 PetscInt i; 2109 PetscErrorCode ierr; 2110 2111 PetscFunctionBegin; 2112 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2113 if (zerodiag) { 2114 Mat A; 2115 Vec vec3_N; 2116 PetscScalar *vals; 2117 const PetscInt *idxs; 2118 PetscInt nz,*count; 2119 2120 /* p0 */ 2121 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2122 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2123 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2124 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2125 for (i=0;i<nz;i++) vals[i] = 1.; 2126 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2127 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2128 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2129 /* v_I */ 2130 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2131 for (i=0;i<nz;i++) vals[i] = 0.; 2132 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2133 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2134 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2135 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2136 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2137 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2138 if (dirIS) { 2139 PetscInt n; 2140 2141 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2142 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2143 for (i=0;i<n;i++) vals[i] = 0.; 2144 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2145 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2146 } 2147 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2148 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2149 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2150 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2151 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2152 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2153 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2154 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2155 ierr = PetscFree(vals);CHKERRQ(ierr); 2156 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2157 2158 /* there should not be any pressure dofs lying on the interface */ 2159 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2160 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2161 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2162 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2163 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2164 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2165 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2166 ierr = PetscFree(count);CHKERRQ(ierr); 2167 } 2168 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2169 2170 /* check PCBDDCBenignGetOrSetP0 */ 2171 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2172 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2173 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2174 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2175 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2176 for (i=0;i<pcbddc->benign_n;i++) { 2177 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2178 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2179 } 2180 PetscFunctionReturn(0); 2181 } 2182 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 2185 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2186 { 2187 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2188 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2189 PetscInt nz,n; 2190 PetscInt *interior_dofs,n_interior_dofs,nneu; 2191 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2192 PetscErrorCode ierr; 2193 2194 PetscFunctionBegin; 2195 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2196 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2197 for (n=0;n<pcbddc->benign_n;n++) { 2198 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2199 } 2200 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2201 pcbddc->benign_n = 0; 2202 /* if a local info on dofs is present, assumes that the last field represents "pressures" 2203 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2204 Checks if all the pressure dofs in each subdomain have a zero diagonal 2205 If not, a change of basis on pressures is not needed 2206 since the local Schur complements are already SPD 2207 */ 2208 has_null_pressures = PETSC_TRUE; 2209 have_null = PETSC_TRUE; 2210 if (pcbddc->n_ISForDofsLocal) { 2211 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2212 2213 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2214 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2215 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2216 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2217 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2218 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2219 if (!sorted) { 2220 ierr = ISSort(pressures);CHKERRQ(ierr); 2221 } 2222 } else { 2223 pressures = NULL; 2224 } 2225 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2226 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2227 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2228 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2229 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2230 if (!sorted) { 2231 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2232 } 2233 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2234 zerodiag_save = zerodiag; 2235 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2236 if (!nz) { 2237 if (n) have_null = PETSC_FALSE; 2238 has_null_pressures = PETSC_FALSE; 2239 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2240 } 2241 recompute_zerodiag = PETSC_FALSE; 2242 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2243 zerodiag_subs = NULL; 2244 pcbddc->benign_n = 0; 2245 n_interior_dofs = 0; 2246 interior_dofs = NULL; 2247 nneu = 0; 2248 if (pcbddc->NeumannBoundariesLocal) { 2249 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2250 } 2251 checkb = (PetscBool)(!nneu || pcbddc->current_level); 2252 if (checkb) { /* need to compute interior nodes */ 2253 PetscInt n,i,j; 2254 PetscInt n_neigh,*neigh,*n_shared,**shared; 2255 PetscInt *iwork; 2256 2257 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2258 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2259 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2260 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2261 for (i=1;i<n_neigh;i++) 2262 for (j=0;j<n_shared[i];j++) 2263 iwork[shared[i][j]] += 1; 2264 for (i=0;i<n;i++) 2265 if (!iwork[i]) 2266 interior_dofs[n_interior_dofs++] = i; 2267 ierr = PetscFree(iwork);CHKERRQ(ierr); 2268 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2269 } 2270 if (has_null_pressures) { 2271 IS *subs; 2272 PetscInt nsubs,i,j,nl; 2273 const PetscInt *idxs; 2274 PetscScalar *array; 2275 Vec *work; 2276 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2277 2278 subs = pcbddc->local_subs; 2279 nsubs = pcbddc->n_local_subs; 2280 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2281 if (checkb) { 2282 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2283 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2284 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2285 /* work[0] = 1_p */ 2286 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2287 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2288 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2289 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2290 /* work[0] = 1_v */ 2291 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2292 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2293 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2294 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2295 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2296 } 2297 if (nsubs > 1) { 2298 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2299 for (i=0;i<nsubs;i++) { 2300 ISLocalToGlobalMapping l2g; 2301 IS t_zerodiag_subs; 2302 PetscInt nl; 2303 2304 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2305 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2306 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2307 if (nl) { 2308 PetscBool valid = PETSC_TRUE; 2309 2310 if (checkb) { 2311 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2312 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2313 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2314 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2315 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2316 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2317 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2318 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2319 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2320 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2321 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2322 for (j=0;j<n_interior_dofs;j++) { 2323 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2324 valid = PETSC_FALSE; 2325 break; 2326 } 2327 } 2328 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2329 } 2330 if (valid && nneu) { 2331 const PetscInt *idxs; 2332 PetscInt nzb; 2333 2334 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2335 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2336 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2337 if (nzb) valid = PETSC_FALSE; 2338 } 2339 if (valid && pressures) { 2340 IS t_pressure_subs; 2341 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2342 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2343 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2344 } 2345 if (valid) { 2346 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2347 pcbddc->benign_n++; 2348 } else { 2349 recompute_zerodiag = PETSC_TRUE; 2350 } 2351 } 2352 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2353 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2354 } 2355 } else { /* there's just one subdomain (or zero if they have not been detected */ 2356 PetscBool valid = PETSC_TRUE; 2357 2358 if (nneu) valid = PETSC_FALSE; 2359 if (valid && pressures) { 2360 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2361 } 2362 if (valid && checkb) { 2363 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2364 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2365 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2366 for (j=0;j<n_interior_dofs;j++) { 2367 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2368 valid = PETSC_FALSE; 2369 break; 2370 } 2371 } 2372 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2373 } 2374 if (valid) { 2375 pcbddc->benign_n = 1; 2376 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2377 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2378 zerodiag_subs[0] = zerodiag; 2379 } 2380 } 2381 if (checkb) { 2382 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2383 } 2384 } 2385 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2386 2387 if (!pcbddc->benign_n) { 2388 PetscInt n; 2389 2390 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2391 recompute_zerodiag = PETSC_FALSE; 2392 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2393 if (n) { 2394 has_null_pressures = PETSC_FALSE; 2395 have_null = PETSC_FALSE; 2396 } 2397 } 2398 2399 /* final check for null pressures */ 2400 if (zerodiag && pressures) { 2401 PetscInt nz,np; 2402 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2403 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2404 if (nz != np) have_null = PETSC_FALSE; 2405 } 2406 2407 if (recompute_zerodiag) { 2408 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2409 if (pcbddc->benign_n == 1) { 2410 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2411 zerodiag = zerodiag_subs[0]; 2412 } else { 2413 PetscInt i,nzn,*new_idxs; 2414 2415 nzn = 0; 2416 for (i=0;i<pcbddc->benign_n;i++) { 2417 PetscInt ns; 2418 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2419 nzn += ns; 2420 } 2421 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2422 nzn = 0; 2423 for (i=0;i<pcbddc->benign_n;i++) { 2424 PetscInt ns,*idxs; 2425 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2426 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2427 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2428 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2429 nzn += ns; 2430 } 2431 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2432 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2433 } 2434 have_null = PETSC_FALSE; 2435 } 2436 2437 /* Prepare matrix to compute no-net-flux */ 2438 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2439 Mat A,loc_divudotp; 2440 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2441 IS row,col,isused = NULL; 2442 PetscInt M,N,n,st,n_isused; 2443 2444 if (pressures) { 2445 isused = pressures; 2446 } else { 2447 isused = zerodiag_save; 2448 } 2449 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2450 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2451 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2452 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2453 n_isused = 0; 2454 if (isused) { 2455 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2456 } 2457 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2458 st = st-n_isused; 2459 if (n) { 2460 const PetscInt *gidxs; 2461 2462 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2463 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2464 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2465 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2466 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2467 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2468 } else { 2469 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2470 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2471 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2472 } 2473 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2474 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2475 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2476 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2477 ierr = ISDestroy(&row);CHKERRQ(ierr); 2478 ierr = ISDestroy(&col);CHKERRQ(ierr); 2479 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2480 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2481 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2482 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2483 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2484 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2485 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2486 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2487 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2488 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2489 } 2490 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2491 2492 /* change of basis and p0 dofs */ 2493 if (has_null_pressures) { 2494 IS zerodiagc; 2495 const PetscInt *idxs,*idxsc; 2496 PetscInt i,s,*nnz; 2497 2498 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2499 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2500 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2501 /* local change of basis for pressures */ 2502 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2503 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2504 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2505 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2506 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2507 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2508 for (i=0;i<pcbddc->benign_n;i++) { 2509 PetscInt nzs,j; 2510 2511 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2512 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2513 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2514 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2515 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2516 } 2517 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2518 ierr = PetscFree(nnz);CHKERRQ(ierr); 2519 /* set identity on velocities */ 2520 for (i=0;i<n-nz;i++) { 2521 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2522 } 2523 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2524 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2525 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2526 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2527 /* set change on pressures */ 2528 for (s=0;s<pcbddc->benign_n;s++) { 2529 PetscScalar *array; 2530 PetscInt nzs; 2531 2532 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2533 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2534 for (i=0;i<nzs-1;i++) { 2535 PetscScalar vals[2]; 2536 PetscInt cols[2]; 2537 2538 cols[0] = idxs[i]; 2539 cols[1] = idxs[nzs-1]; 2540 vals[0] = 1.; 2541 vals[1] = 1.; 2542 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2543 } 2544 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2545 for (i=0;i<nzs-1;i++) array[i] = -1.; 2546 array[nzs-1] = 1.; 2547 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2548 /* store local idxs for p0 */ 2549 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2550 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2551 ierr = PetscFree(array);CHKERRQ(ierr); 2552 } 2553 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2554 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2555 /* project if needed */ 2556 if (pcbddc->benign_change_explicit) { 2557 Mat M; 2558 2559 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2560 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2561 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2562 ierr = MatDestroy(&M);CHKERRQ(ierr); 2563 } 2564 /* store global idxs for p0 */ 2565 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2566 } 2567 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2568 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2569 2570 /* determines if the coarse solver will be singular or not */ 2571 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2572 /* determines if the problem has subdomains with 0 pressure block */ 2573 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2574 *zerodiaglocal = zerodiag; 2575 PetscFunctionReturn(0); 2576 } 2577 2578 #undef __FUNCT__ 2579 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2580 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2581 { 2582 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2583 PetscScalar *array; 2584 PetscErrorCode ierr; 2585 2586 PetscFunctionBegin; 2587 if (!pcbddc->benign_sf) { 2588 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2589 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2590 } 2591 if (get) { 2592 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2593 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2594 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2595 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2596 } else { 2597 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2598 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2599 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2600 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2601 } 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2607 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2608 { 2609 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2610 PetscErrorCode ierr; 2611 2612 PetscFunctionBegin; 2613 /* TODO: add error checking 2614 - avoid nested pop (or push) calls. 2615 - cannot push before pop. 2616 - cannot call this if pcbddc->local_mat is NULL 2617 */ 2618 if (!pcbddc->benign_n) { 2619 PetscFunctionReturn(0); 2620 } 2621 if (pop) { 2622 if (pcbddc->benign_change_explicit) { 2623 IS is_p0; 2624 MatReuse reuse; 2625 2626 /* extract B_0 */ 2627 reuse = MAT_INITIAL_MATRIX; 2628 if (pcbddc->benign_B0) { 2629 reuse = MAT_REUSE_MATRIX; 2630 } 2631 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2632 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2633 /* remove rows and cols from local problem */ 2634 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2635 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2636 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2637 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2638 } else { 2639 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2640 PetscScalar *vals; 2641 PetscInt i,n,*idxs_ins; 2642 2643 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2644 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2645 if (!pcbddc->benign_B0) { 2646 PetscInt *nnz; 2647 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2648 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2649 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2650 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2651 for (i=0;i<pcbddc->benign_n;i++) { 2652 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2653 nnz[i] = n - nnz[i]; 2654 } 2655 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2656 ierr = PetscFree(nnz);CHKERRQ(ierr); 2657 } 2658 2659 for (i=0;i<pcbddc->benign_n;i++) { 2660 PetscScalar *array; 2661 PetscInt *idxs,j,nz,cum; 2662 2663 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2664 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2665 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2666 for (j=0;j<nz;j++) vals[j] = 1.; 2667 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2668 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2669 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2670 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2671 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2672 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2673 cum = 0; 2674 for (j=0;j<n;j++) { 2675 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2676 vals[cum] = array[j]; 2677 idxs_ins[cum] = j; 2678 cum++; 2679 } 2680 } 2681 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2682 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2683 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2684 } 2685 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2686 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2687 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2688 } 2689 } else { /* push */ 2690 if (pcbddc->benign_change_explicit) { 2691 PetscInt i; 2692 2693 for (i=0;i<pcbddc->benign_n;i++) { 2694 PetscScalar *B0_vals; 2695 PetscInt *B0_cols,B0_ncol; 2696 2697 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2698 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2699 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2700 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2701 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2702 } 2703 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2704 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2705 } else { 2706 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2707 } 2708 } 2709 PetscFunctionReturn(0); 2710 } 2711 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2714 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2715 { 2716 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2717 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2718 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2719 PetscBLASInt *B_iwork,*B_ifail; 2720 PetscScalar *work,lwork; 2721 PetscScalar *St,*S,*eigv; 2722 PetscScalar *Sarray,*Starray; 2723 PetscReal *eigs,thresh; 2724 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2725 PetscBool allocated_S_St; 2726 #if defined(PETSC_USE_COMPLEX) 2727 PetscReal *rwork; 2728 #endif 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2733 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2734 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2735 2736 if (pcbddc->dbg_flag) { 2737 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2738 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2739 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2740 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2741 } 2742 2743 if (pcbddc->dbg_flag) { 2744 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2745 } 2746 2747 /* max size of subsets */ 2748 mss = 0; 2749 for (i=0;i<sub_schurs->n_subs;i++) { 2750 PetscInt subset_size; 2751 2752 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2753 mss = PetscMax(mss,subset_size); 2754 } 2755 2756 /* min/max and threshold */ 2757 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2758 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2759 nmax = PetscMax(nmin,nmax); 2760 allocated_S_St = PETSC_FALSE; 2761 if (nmin) { 2762 allocated_S_St = PETSC_TRUE; 2763 } 2764 2765 /* allocate lapack workspace */ 2766 cum = cum2 = 0; 2767 maxneigs = 0; 2768 for (i=0;i<sub_schurs->n_subs;i++) { 2769 PetscInt n,subset_size; 2770 2771 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2772 n = PetscMin(subset_size,nmax); 2773 cum += subset_size; 2774 cum2 += subset_size*n; 2775 maxneigs = PetscMax(maxneigs,n); 2776 } 2777 if (mss) { 2778 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2779 PetscBLASInt B_itype = 1; 2780 PetscBLASInt B_N = mss; 2781 PetscReal zero = 0.0; 2782 PetscReal eps = 0.0; /* dlamch? */ 2783 2784 B_lwork = -1; 2785 S = NULL; 2786 St = NULL; 2787 eigs = NULL; 2788 eigv = NULL; 2789 B_iwork = NULL; 2790 B_ifail = NULL; 2791 #if defined(PETSC_USE_COMPLEX) 2792 rwork = NULL; 2793 #endif 2794 thresh = 1.0; 2795 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2796 #if defined(PETSC_USE_COMPLEX) 2797 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2798 #else 2799 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2800 #endif 2801 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2802 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2803 } else { 2804 /* TODO */ 2805 } 2806 } else { 2807 lwork = 0; 2808 } 2809 2810 nv = 0; 2811 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2812 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2813 } 2814 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2815 if (allocated_S_St) { 2816 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2817 } 2818 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2819 #if defined(PETSC_USE_COMPLEX) 2820 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2821 #endif 2822 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2823 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2824 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2825 nv+cum,&pcbddc->adaptive_constraints_idxs, 2826 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2827 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2828 2829 maxneigs = 0; 2830 cum = cumarray = 0; 2831 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2832 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2833 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2834 const PetscInt *idxs; 2835 2836 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2837 for (cum=0;cum<nv;cum++) { 2838 pcbddc->adaptive_constraints_n[cum] = 1; 2839 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2840 pcbddc->adaptive_constraints_data[cum] = 1.0; 2841 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2842 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2843 } 2844 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2845 } 2846 2847 if (mss) { /* multilevel */ 2848 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2849 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2850 } 2851 2852 thresh = pcbddc->adaptive_threshold; 2853 for (i=0;i<sub_schurs->n_subs;i++) { 2854 const PetscInt *idxs; 2855 PetscReal upper,lower; 2856 PetscInt j,subset_size,eigs_start = 0; 2857 PetscBLASInt B_N; 2858 PetscBool same_data = PETSC_FALSE; 2859 2860 if (pcbddc->use_deluxe_scaling) { 2861 upper = PETSC_MAX_REAL; 2862 lower = thresh; 2863 } else { 2864 upper = 1./thresh; 2865 lower = 0.; 2866 } 2867 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2868 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2869 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2870 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2871 if (sub_schurs->is_hermitian) { 2872 PetscInt j,k; 2873 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2874 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2875 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2876 } 2877 for (j=0;j<subset_size;j++) { 2878 for (k=j;k<subset_size;k++) { 2879 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2880 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2881 } 2882 } 2883 } else { 2884 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2885 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2886 } 2887 } else { 2888 S = Sarray + cumarray; 2889 St = Starray + cumarray; 2890 } 2891 /* see if we can save some work */ 2892 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2893 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2894 } 2895 2896 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2897 B_neigs = 0; 2898 } else { 2899 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2900 PetscBLASInt B_itype = 1; 2901 PetscBLASInt B_IL, B_IU; 2902 PetscReal eps = -1.0; /* dlamch? */ 2903 PetscInt nmin_s; 2904 PetscBool compute_range = PETSC_FALSE; 2905 2906 if (pcbddc->dbg_flag) { 2907 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2908 } 2909 2910 compute_range = PETSC_FALSE; 2911 if (thresh > 1.+PETSC_SMALL && !same_data) { 2912 compute_range = PETSC_TRUE; 2913 } 2914 2915 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2916 if (compute_range) { 2917 2918 /* ask for eigenvalues larger than thresh */ 2919 #if defined(PETSC_USE_COMPLEX) 2920 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2921 #else 2922 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2923 #endif 2924 } else if (!same_data) { 2925 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2926 B_IL = 1; 2927 #if defined(PETSC_USE_COMPLEX) 2928 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2929 #else 2930 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2931 #endif 2932 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2933 PetscInt k; 2934 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2935 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2936 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2937 nmin = nmax; 2938 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2939 for (k=0;k<nmax;k++) { 2940 eigs[k] = 1./PETSC_SMALL; 2941 eigv[k*(subset_size+1)] = 1.0; 2942 } 2943 } 2944 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2945 if (B_ierr) { 2946 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2947 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2948 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2949 } 2950 2951 if (B_neigs > nmax) { 2952 if (pcbddc->dbg_flag) { 2953 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2954 } 2955 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2956 B_neigs = nmax; 2957 } 2958 2959 nmin_s = PetscMin(nmin,B_N); 2960 if (B_neigs < nmin_s) { 2961 PetscBLASInt B_neigs2; 2962 2963 if (pcbddc->use_deluxe_scaling) { 2964 B_IL = B_N - nmin_s + 1; 2965 B_IU = B_N - B_neigs; 2966 } else { 2967 B_IL = B_neigs + 1; 2968 B_IU = nmin_s; 2969 } 2970 if (pcbddc->dbg_flag) { 2971 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2972 } 2973 if (sub_schurs->is_hermitian) { 2974 PetscInt j,k; 2975 for (j=0;j<subset_size;j++) { 2976 for (k=j;k<subset_size;k++) { 2977 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2978 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2979 } 2980 } 2981 } else { 2982 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2983 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2984 } 2985 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2986 #if defined(PETSC_USE_COMPLEX) 2987 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2988 #else 2989 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2990 #endif 2991 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2992 B_neigs += B_neigs2; 2993 } 2994 if (B_ierr) { 2995 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2996 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2997 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2998 } 2999 if (pcbddc->dbg_flag) { 3000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3001 for (j=0;j<B_neigs;j++) { 3002 if (eigs[j] == 0.0) { 3003 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3004 } else { 3005 if (pcbddc->use_deluxe_scaling) { 3006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3007 } else { 3008 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3009 } 3010 } 3011 } 3012 } 3013 } else { 3014 /* TODO */ 3015 } 3016 } 3017 /* change the basis back to the original one */ 3018 if (sub_schurs->change) { 3019 Mat change,phi,phit; 3020 3021 if (pcbddc->dbg_flag > 1) { 3022 PetscInt ii; 3023 for (ii=0;ii<B_neigs;ii++) { 3024 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3025 for (j=0;j<B_N;j++) { 3026 #if defined(PETSC_USE_COMPLEX) 3027 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3028 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3030 #else 3031 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3032 #endif 3033 } 3034 } 3035 } 3036 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3037 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3038 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3039 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3040 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3041 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3042 } 3043 maxneigs = PetscMax(B_neigs,maxneigs); 3044 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3045 if (B_neigs) { 3046 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3047 3048 if (pcbddc->dbg_flag > 1) { 3049 PetscInt ii; 3050 for (ii=0;ii<B_neigs;ii++) { 3051 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3052 for (j=0;j<B_N;j++) { 3053 #if defined(PETSC_USE_COMPLEX) 3054 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3055 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3056 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3057 #else 3058 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3059 #endif 3060 } 3061 } 3062 } 3063 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3064 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3065 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3066 cum++; 3067 } 3068 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3069 /* shift for next computation */ 3070 cumarray += subset_size*subset_size; 3071 } 3072 if (pcbddc->dbg_flag) { 3073 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3074 } 3075 3076 if (mss) { 3077 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3078 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3079 /* destroy matrices (junk) */ 3080 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3081 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3082 } 3083 if (allocated_S_St) { 3084 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3085 } 3086 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3087 #if defined(PETSC_USE_COMPLEX) 3088 ierr = PetscFree(rwork);CHKERRQ(ierr); 3089 #endif 3090 if (pcbddc->dbg_flag) { 3091 PetscInt maxneigs_r; 3092 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3093 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3094 } 3095 PetscFunctionReturn(0); 3096 } 3097 3098 #undef __FUNCT__ 3099 #define __FUNCT__ "PCBDDCSetUpSolvers" 3100 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3101 { 3102 PetscScalar *coarse_submat_vals; 3103 PetscErrorCode ierr; 3104 3105 PetscFunctionBegin; 3106 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3107 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3108 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3109 3110 /* Setup local neumann solver ksp_R */ 3111 /* PCBDDCSetUpLocalScatters should be called first! */ 3112 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3113 3114 /* 3115 Setup local correction and local part of coarse basis. 3116 Gives back the dense local part of the coarse matrix in column major ordering 3117 */ 3118 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3119 3120 /* Compute total number of coarse nodes and setup coarse solver */ 3121 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3122 3123 /* free */ 3124 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3125 PetscFunctionReturn(0); 3126 } 3127 3128 #undef __FUNCT__ 3129 #define __FUNCT__ "PCBDDCResetCustomization" 3130 PetscErrorCode PCBDDCResetCustomization(PC pc) 3131 { 3132 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3133 PetscErrorCode ierr; 3134 3135 PetscFunctionBegin; 3136 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3137 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3138 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3139 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3140 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3141 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3142 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3143 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3144 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3145 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3146 PetscFunctionReturn(0); 3147 } 3148 3149 #undef __FUNCT__ 3150 #define __FUNCT__ "PCBDDCResetTopography" 3151 PetscErrorCode PCBDDCResetTopography(PC pc) 3152 { 3153 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3154 PetscInt i; 3155 PetscErrorCode ierr; 3156 3157 PetscFunctionBegin; 3158 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3159 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3160 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3161 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3162 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3163 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3164 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3165 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3166 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3167 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3168 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3169 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3170 for (i=0;i<pcbddc->n_local_subs;i++) { 3171 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3172 } 3173 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3174 if (pcbddc->sub_schurs) { 3175 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 3176 } 3177 pcbddc->graphanalyzed = PETSC_FALSE; 3178 pcbddc->recompute_topography = PETSC_TRUE; 3179 PetscFunctionReturn(0); 3180 } 3181 3182 #undef __FUNCT__ 3183 #define __FUNCT__ "PCBDDCResetSolvers" 3184 PetscErrorCode PCBDDCResetSolvers(PC pc) 3185 { 3186 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3187 PetscErrorCode ierr; 3188 3189 PetscFunctionBegin; 3190 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3191 if (pcbddc->coarse_phi_B) { 3192 PetscScalar *array; 3193 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3194 ierr = PetscFree(array);CHKERRQ(ierr); 3195 } 3196 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3197 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3198 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3199 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3200 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3201 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3202 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3203 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3204 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3205 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3206 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3207 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3208 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3209 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3210 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 3211 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 3212 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3213 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3214 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3215 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3216 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3217 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3218 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3219 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3220 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3221 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3222 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3223 if (pcbddc->benign_zerodiag_subs) { 3224 PetscInt i; 3225 for (i=0;i<pcbddc->benign_n;i++) { 3226 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3227 } 3228 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3229 } 3230 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3231 PetscFunctionReturn(0); 3232 } 3233 3234 #undef __FUNCT__ 3235 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 3236 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3237 { 3238 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3239 PC_IS *pcis = (PC_IS*)pc->data; 3240 VecType impVecType; 3241 PetscInt n_constraints,n_R,old_size; 3242 PetscErrorCode ierr; 3243 3244 PetscFunctionBegin; 3245 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3246 n_R = pcis->n - pcbddc->n_vertices; 3247 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3248 /* local work vectors (try to avoid unneeded work)*/ 3249 /* R nodes */ 3250 old_size = -1; 3251 if (pcbddc->vec1_R) { 3252 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3253 } 3254 if (n_R != old_size) { 3255 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3256 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3257 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3258 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3259 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3260 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3261 } 3262 /* local primal dofs */ 3263 old_size = -1; 3264 if (pcbddc->vec1_P) { 3265 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3266 } 3267 if (pcbddc->local_primal_size != old_size) { 3268 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3269 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3270 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3271 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3272 } 3273 /* local explicit constraints */ 3274 old_size = -1; 3275 if (pcbddc->vec1_C) { 3276 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3277 } 3278 if (n_constraints && n_constraints != old_size) { 3279 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3280 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3281 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3282 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3283 } 3284 PetscFunctionReturn(0); 3285 } 3286 3287 #undef __FUNCT__ 3288 #define __FUNCT__ "PCBDDCSetUpCorrection" 3289 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3290 { 3291 PetscErrorCode ierr; 3292 /* pointers to pcis and pcbddc */ 3293 PC_IS* pcis = (PC_IS*)pc->data; 3294 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3295 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3296 /* submatrices of local problem */ 3297 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3298 /* submatrices of local coarse problem */ 3299 Mat S_VV,S_CV,S_VC,S_CC; 3300 /* working matrices */ 3301 Mat C_CR; 3302 /* additional working stuff */ 3303 PC pc_R; 3304 Mat F; 3305 Vec dummy_vec; 3306 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3307 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3308 PetscScalar *work; 3309 PetscInt *idx_V_B; 3310 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3311 PetscInt i,n_R,n_D,n_B; 3312 3313 /* some shortcuts to scalars */ 3314 PetscScalar one=1.0,m_one=-1.0; 3315 3316 PetscFunctionBegin; 3317 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3318 3319 /* Set Non-overlapping dimensions */ 3320 n_vertices = pcbddc->n_vertices; 3321 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3322 n_B = pcis->n_B; 3323 n_D = pcis->n - n_B; 3324 n_R = pcis->n - n_vertices; 3325 3326 /* vertices in boundary numbering */ 3327 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3328 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3329 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3330 3331 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3332 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3333 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3334 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3335 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3336 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3337 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3338 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3339 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3340 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3341 3342 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3343 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3344 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3345 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3346 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3347 lda_rhs = n_R; 3348 need_benign_correction = PETSC_FALSE; 3349 if (isLU || isILU || isCHOL) { 3350 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3351 } else if (sub_schurs && sub_schurs->reuse_solver) { 3352 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3353 MatFactorType type; 3354 3355 F = reuse_solver->F; 3356 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3357 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3358 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3359 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3360 } else { 3361 F = NULL; 3362 } 3363 3364 /* allocate workspace */ 3365 n = 0; 3366 if (n_constraints) { 3367 n += lda_rhs*n_constraints; 3368 } 3369 if (n_vertices) { 3370 n = PetscMax(2*lda_rhs*n_vertices,n); 3371 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3372 } 3373 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3374 3375 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3376 dummy_vec = NULL; 3377 if (need_benign_correction && lda_rhs != n_R && F) { 3378 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3379 } 3380 3381 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3382 if (n_constraints) { 3383 Mat M1,M2,M3,C_B; 3384 IS is_aux; 3385 PetscScalar *array,*array2; 3386 3387 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3388 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3389 3390 /* Extract constraints on R nodes: C_{CR} */ 3391 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3392 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3393 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3394 3395 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3396 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3397 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3398 for (i=0;i<n_constraints;i++) { 3399 const PetscScalar *row_cmat_values; 3400 const PetscInt *row_cmat_indices; 3401 PetscInt size_of_constraint,j; 3402 3403 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3404 for (j=0;j<size_of_constraint;j++) { 3405 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3406 } 3407 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3408 } 3409 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3410 if (F) { 3411 Mat B; 3412 3413 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3414 if (need_benign_correction) { 3415 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3416 3417 /* rhs is already zero on interior dofs, no need to change the rhs */ 3418 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3419 } 3420 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3421 if (need_benign_correction) { 3422 PetscScalar *marr; 3423 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3424 3425 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3426 if (lda_rhs != n_R) { 3427 for (i=0;i<n_constraints;i++) { 3428 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3429 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3430 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3431 } 3432 } else { 3433 for (i=0;i<n_constraints;i++) { 3434 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3435 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3436 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3437 } 3438 } 3439 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3440 } 3441 ierr = MatDestroy(&B);CHKERRQ(ierr); 3442 } else { 3443 PetscScalar *marr; 3444 3445 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3446 for (i=0;i<n_constraints;i++) { 3447 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3448 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3449 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3450 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3451 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3452 } 3453 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3454 } 3455 if (!pcbddc->switch_static) { 3456 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3457 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3458 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3459 for (i=0;i<n_constraints;i++) { 3460 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3461 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3462 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3463 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3464 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3465 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3466 } 3467 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3468 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3469 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3470 } else { 3471 if (lda_rhs != n_R) { 3472 IS dummy; 3473 3474 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3475 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3476 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3477 } else { 3478 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3479 pcbddc->local_auxmat2 = local_auxmat2_R; 3480 } 3481 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3482 } 3483 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3484 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3485 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3486 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3487 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3488 if (isCHOL) { 3489 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3490 } else { 3491 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3492 } 3493 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3494 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3495 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3496 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3497 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3498 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3499 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3500 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3501 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3502 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3503 } 3504 3505 /* Get submatrices from subdomain matrix */ 3506 if (n_vertices) { 3507 IS is_aux; 3508 3509 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3510 IS tis; 3511 3512 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3513 ierr = ISSort(tis);CHKERRQ(ierr); 3514 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3515 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3516 } else { 3517 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3518 } 3519 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3520 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3521 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3522 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3523 } 3524 3525 /* Matrix of coarse basis functions (local) */ 3526 if (pcbddc->coarse_phi_B) { 3527 PetscInt on_B,on_primal,on_D=n_D; 3528 if (pcbddc->coarse_phi_D) { 3529 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3530 } 3531 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3532 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3533 PetscScalar *marray; 3534 3535 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3536 ierr = PetscFree(marray);CHKERRQ(ierr); 3537 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3538 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3539 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3540 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3541 } 3542 } 3543 3544 if (!pcbddc->coarse_phi_B) { 3545 PetscScalar *marray; 3546 3547 n = n_B*pcbddc->local_primal_size; 3548 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3549 n += n_D*pcbddc->local_primal_size; 3550 } 3551 if (!pcbddc->symmetric_primal) { 3552 n *= 2; 3553 } 3554 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3555 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3556 n = n_B*pcbddc->local_primal_size; 3557 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3558 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3559 n += n_D*pcbddc->local_primal_size; 3560 } 3561 if (!pcbddc->symmetric_primal) { 3562 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3563 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3564 n = n_B*pcbddc->local_primal_size; 3565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3566 } 3567 } else { 3568 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3569 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3570 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3571 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3572 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3573 } 3574 } 3575 } 3576 3577 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3578 p0_lidx_I = NULL; 3579 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3580 const PetscInt *idxs; 3581 3582 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3583 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3584 for (i=0;i<pcbddc->benign_n;i++) { 3585 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3586 } 3587 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3588 } 3589 3590 /* vertices */ 3591 if (n_vertices) { 3592 3593 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3594 3595 if (n_R) { 3596 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3597 PetscBLASInt B_N,B_one = 1; 3598 PetscScalar *x,*y; 3599 PetscBool isseqaij; 3600 3601 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3602 if (need_benign_correction) { 3603 ISLocalToGlobalMapping RtoN; 3604 IS is_p0; 3605 PetscInt *idxs_p0,n; 3606 3607 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3608 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3609 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3610 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3611 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3612 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3613 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3614 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3615 } 3616 3617 if (lda_rhs == n_R) { 3618 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3619 } else { 3620 PetscScalar *av,*array; 3621 const PetscInt *xadj,*adjncy; 3622 PetscInt n; 3623 PetscBool flg_row; 3624 3625 array = work+lda_rhs*n_vertices; 3626 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3627 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3628 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3629 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3630 for (i=0;i<n;i++) { 3631 PetscInt j; 3632 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3633 } 3634 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3635 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3636 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3637 } 3638 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3639 if (need_benign_correction) { 3640 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3641 PetscScalar *marr; 3642 3643 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3644 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3645 3646 | 0 0 0 | (V) 3647 L = | 0 0 -1 | (P-p0) 3648 | 0 0 -1 | (p0) 3649 3650 */ 3651 for (i=0;i<reuse_solver->benign_n;i++) { 3652 const PetscScalar *vals; 3653 const PetscInt *idxs,*idxs_zero; 3654 PetscInt n,j,nz; 3655 3656 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3657 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3658 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3659 for (j=0;j<n;j++) { 3660 PetscScalar val = vals[j]; 3661 PetscInt k,col = idxs[j]; 3662 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3663 } 3664 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3665 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3666 } 3667 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3668 } 3669 if (F) { 3670 /* need to correct the rhs */ 3671 if (need_benign_correction) { 3672 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3673 PetscScalar *marr; 3674 3675 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3676 if (lda_rhs != n_R) { 3677 for (i=0;i<n_vertices;i++) { 3678 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3679 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3680 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3681 } 3682 } else { 3683 for (i=0;i<n_vertices;i++) { 3684 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3685 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3686 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3687 } 3688 } 3689 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3690 } 3691 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3692 /* need to correct the solution */ 3693 if (need_benign_correction) { 3694 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3695 PetscScalar *marr; 3696 3697 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3698 if (lda_rhs != n_R) { 3699 for (i=0;i<n_vertices;i++) { 3700 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3701 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3702 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3703 } 3704 } else { 3705 for (i=0;i<n_vertices;i++) { 3706 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3707 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3708 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3709 } 3710 } 3711 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3712 } 3713 } else { 3714 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3715 for (i=0;i<n_vertices;i++) { 3716 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3717 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3718 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3719 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3720 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3721 } 3722 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3723 } 3724 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3725 /* S_VV and S_CV */ 3726 if (n_constraints) { 3727 Mat B; 3728 3729 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3730 for (i=0;i<n_vertices;i++) { 3731 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3732 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3733 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3734 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3735 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3736 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3737 } 3738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3739 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3740 ierr = MatDestroy(&B);CHKERRQ(ierr); 3741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3742 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3743 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3744 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3745 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3746 ierr = MatDestroy(&B);CHKERRQ(ierr); 3747 } 3748 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3749 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3750 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3751 } 3752 if (lda_rhs != n_R) { 3753 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3755 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3756 } 3757 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3758 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3759 if (need_benign_correction) { 3760 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3761 PetscScalar *marr,*sums; 3762 3763 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3764 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3765 for (i=0;i<reuse_solver->benign_n;i++) { 3766 const PetscScalar *vals; 3767 const PetscInt *idxs,*idxs_zero; 3768 PetscInt n,j,nz; 3769 3770 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3771 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3772 for (j=0;j<n_vertices;j++) { 3773 PetscInt k; 3774 sums[j] = 0.; 3775 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3776 } 3777 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3778 for (j=0;j<n;j++) { 3779 PetscScalar val = vals[j]; 3780 PetscInt k; 3781 for (k=0;k<n_vertices;k++) { 3782 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3783 } 3784 } 3785 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3786 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3787 } 3788 ierr = PetscFree(sums);CHKERRQ(ierr); 3789 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3790 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3791 } 3792 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3793 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3794 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3795 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3796 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3797 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3798 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3799 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3800 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3801 } else { 3802 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3803 } 3804 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3805 3806 /* coarse basis functions */ 3807 for (i=0;i<n_vertices;i++) { 3808 PetscScalar *y; 3809 3810 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3811 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3812 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3813 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3814 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3815 y[n_B*i+idx_V_B[i]] = 1.0; 3816 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3817 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3818 3819 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3820 PetscInt j; 3821 3822 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3823 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3824 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3825 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3826 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3827 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3828 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3829 } 3830 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3831 } 3832 /* if n_R == 0 the object is not destroyed */ 3833 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3834 } 3835 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3836 3837 if (n_constraints) { 3838 Mat B; 3839 3840 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3841 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3842 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3843 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3844 if (n_vertices) { 3845 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3846 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3847 } else { 3848 Mat S_VCt; 3849 3850 if (lda_rhs != n_R) { 3851 ierr = MatDestroy(&B);CHKERRQ(ierr); 3852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3853 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3854 } 3855 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3856 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3857 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3858 } 3859 } 3860 ierr = MatDestroy(&B);CHKERRQ(ierr); 3861 /* coarse basis functions */ 3862 for (i=0;i<n_constraints;i++) { 3863 PetscScalar *y; 3864 3865 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3866 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3867 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3868 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3869 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3870 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3871 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3872 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3873 PetscInt j; 3874 3875 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3876 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3877 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3878 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3879 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3880 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3881 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3882 } 3883 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3884 } 3885 } 3886 if (n_constraints) { 3887 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3888 } 3889 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3890 3891 /* coarse matrix entries relative to B_0 */ 3892 if (pcbddc->benign_n) { 3893 Mat B0_B,B0_BPHI; 3894 IS is_dummy; 3895 PetscScalar *data; 3896 PetscInt j; 3897 3898 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3899 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3900 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3901 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3902 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3903 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3904 for (j=0;j<pcbddc->benign_n;j++) { 3905 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3906 for (i=0;i<pcbddc->local_primal_size;i++) { 3907 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3908 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3909 } 3910 } 3911 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3912 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3913 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3914 } 3915 3916 /* compute other basis functions for non-symmetric problems */ 3917 if (!pcbddc->symmetric_primal) { 3918 Mat B_V=NULL,B_C=NULL; 3919 PetscScalar *marray; 3920 3921 if (n_constraints) { 3922 Mat S_CCT,C_CRT; 3923 3924 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3925 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3926 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3927 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3928 if (n_vertices) { 3929 Mat S_VCT; 3930 3931 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3932 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3933 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3934 } 3935 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3936 } else { 3937 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3938 } 3939 if (n_vertices && n_R) { 3940 PetscScalar *av,*marray; 3941 const PetscInt *xadj,*adjncy; 3942 PetscInt n; 3943 PetscBool flg_row; 3944 3945 /* B_V = B_V - A_VR^T */ 3946 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3947 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3948 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3949 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3950 for (i=0;i<n;i++) { 3951 PetscInt j; 3952 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3953 } 3954 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3955 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3956 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3957 } 3958 3959 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3960 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3961 for (i=0;i<n_vertices;i++) { 3962 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3963 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3964 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3965 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3966 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3967 } 3968 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3969 if (B_C) { 3970 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3971 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3972 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3973 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3974 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3975 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3976 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3977 } 3978 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3979 } 3980 /* coarse basis functions */ 3981 for (i=0;i<pcbddc->local_primal_size;i++) { 3982 PetscScalar *y; 3983 3984 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3985 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3986 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3987 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3988 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3989 if (i<n_vertices) { 3990 y[n_B*i+idx_V_B[i]] = 1.0; 3991 } 3992 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3993 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3994 3995 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3996 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3997 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3998 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3999 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4000 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4001 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4002 } 4003 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4004 } 4005 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4006 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4007 } 4008 /* free memory */ 4009 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4010 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4011 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4012 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4013 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4014 ierr = PetscFree(work);CHKERRQ(ierr); 4015 if (n_vertices) { 4016 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4017 } 4018 if (n_constraints) { 4019 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4020 } 4021 /* Checking coarse_sub_mat and coarse basis functios */ 4022 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4023 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4024 if (pcbddc->dbg_flag) { 4025 Mat coarse_sub_mat; 4026 Mat AUXMAT,TM1,TM2,TM3,TM4; 4027 Mat coarse_phi_D,coarse_phi_B; 4028 Mat coarse_psi_D,coarse_psi_B; 4029 Mat A_II,A_BB,A_IB,A_BI; 4030 Mat C_B,CPHI; 4031 IS is_dummy; 4032 Vec mones; 4033 MatType checkmattype=MATSEQAIJ; 4034 PetscReal real_value; 4035 4036 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4037 Mat A; 4038 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4039 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4040 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4041 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4042 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4043 ierr = MatDestroy(&A);CHKERRQ(ierr); 4044 } else { 4045 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4046 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4047 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4048 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4049 } 4050 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4051 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4052 if (!pcbddc->symmetric_primal) { 4053 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4054 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4055 } 4056 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4057 4058 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4059 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4060 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4061 if (!pcbddc->symmetric_primal) { 4062 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4063 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4064 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4065 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4066 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4067 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4068 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4069 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4070 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4071 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4072 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4073 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4074 } else { 4075 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4076 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4077 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4078 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4079 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4080 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4081 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4082 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4083 } 4084 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4085 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4086 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4087 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4088 if (pcbddc->benign_n) { 4089 Mat B0_B,B0_BPHI; 4090 PetscScalar *data,*data2; 4091 PetscInt j; 4092 4093 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4094 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4095 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4096 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4097 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4098 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4099 for (j=0;j<pcbddc->benign_n;j++) { 4100 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4101 for (i=0;i<pcbddc->local_primal_size;i++) { 4102 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4103 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4104 } 4105 } 4106 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4107 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4108 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4109 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4110 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4111 } 4112 #if 0 4113 { 4114 PetscViewer viewer; 4115 char filename[256]; 4116 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4117 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4118 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4119 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4120 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4121 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4122 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4123 if (save_change) { 4124 Mat phi_B; 4125 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4126 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4127 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4128 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4129 } else { 4130 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4131 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4132 } 4133 if (pcbddc->coarse_phi_D) { 4134 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4135 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4136 } 4137 if (pcbddc->coarse_psi_B) { 4138 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4139 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4140 } 4141 if (pcbddc->coarse_psi_D) { 4142 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4143 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4144 } 4145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4146 } 4147 #endif 4148 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4149 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4150 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4151 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4152 4153 /* check constraints */ 4154 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4155 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4156 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4157 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4158 } else { 4159 PetscScalar *data; 4160 Mat tmat; 4161 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4162 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4163 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4164 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4165 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4166 } 4167 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4168 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4169 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4170 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4172 if (!pcbddc->symmetric_primal) { 4173 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4174 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4175 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4176 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4177 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4178 } 4179 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4180 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4181 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4182 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4183 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4184 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4185 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4186 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4187 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4188 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4189 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4190 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4191 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4192 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4193 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4194 if (!pcbddc->symmetric_primal) { 4195 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4196 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4197 } 4198 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4199 } 4200 /* get back data */ 4201 *coarse_submat_vals_n = coarse_submat_vals; 4202 PetscFunctionReturn(0); 4203 } 4204 4205 #undef __FUNCT__ 4206 #define __FUNCT__ "MatGetSubMatrixUnsorted" 4207 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4208 { 4209 Mat *work_mat; 4210 IS isrow_s,iscol_s; 4211 PetscBool rsorted,csorted; 4212 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4213 PetscErrorCode ierr; 4214 4215 PetscFunctionBegin; 4216 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4217 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4218 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4219 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4220 4221 if (!rsorted) { 4222 const PetscInt *idxs; 4223 PetscInt *idxs_sorted,i; 4224 4225 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4226 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4227 for (i=0;i<rsize;i++) { 4228 idxs_perm_r[i] = i; 4229 } 4230 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4231 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4232 for (i=0;i<rsize;i++) { 4233 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4234 } 4235 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4236 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4237 } else { 4238 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4239 isrow_s = isrow; 4240 } 4241 4242 if (!csorted) { 4243 if (isrow == iscol) { 4244 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4245 iscol_s = isrow_s; 4246 } else { 4247 const PetscInt *idxs; 4248 PetscInt *idxs_sorted,i; 4249 4250 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4251 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4252 for (i=0;i<csize;i++) { 4253 idxs_perm_c[i] = i; 4254 } 4255 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4256 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4257 for (i=0;i<csize;i++) { 4258 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4259 } 4260 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4261 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4262 } 4263 } else { 4264 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4265 iscol_s = iscol; 4266 } 4267 4268 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4269 4270 if (!rsorted || !csorted) { 4271 Mat new_mat; 4272 IS is_perm_r,is_perm_c; 4273 4274 if (!rsorted) { 4275 PetscInt *idxs_r,i; 4276 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4277 for (i=0;i<rsize;i++) { 4278 idxs_r[idxs_perm_r[i]] = i; 4279 } 4280 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4281 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4282 } else { 4283 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4284 } 4285 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4286 4287 if (!csorted) { 4288 if (isrow_s == iscol_s) { 4289 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4290 is_perm_c = is_perm_r; 4291 } else { 4292 PetscInt *idxs_c,i; 4293 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4294 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4295 for (i=0;i<csize;i++) { 4296 idxs_c[idxs_perm_c[i]] = i; 4297 } 4298 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4299 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4300 } 4301 } else { 4302 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4303 } 4304 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4305 4306 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4307 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4308 work_mat[0] = new_mat; 4309 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4310 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4311 } 4312 4313 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4314 *B = work_mat[0]; 4315 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4316 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4317 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4318 PetscFunctionReturn(0); 4319 } 4320 4321 #undef __FUNCT__ 4322 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4323 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4324 { 4325 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4326 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4327 Mat new_mat; 4328 IS is_local,is_global; 4329 PetscInt local_size; 4330 PetscBool isseqaij; 4331 PetscErrorCode ierr; 4332 4333 PetscFunctionBegin; 4334 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4335 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4336 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4337 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4338 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4339 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4340 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4341 4342 /* check */ 4343 if (pcbddc->dbg_flag) { 4344 Vec x,x_change; 4345 PetscReal error; 4346 4347 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4348 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4349 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4350 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4351 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4352 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4353 if (!pcbddc->change_interior) { 4354 const PetscScalar *x,*y,*v; 4355 PetscReal lerror = 0.; 4356 PetscInt i; 4357 4358 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4359 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4360 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4361 for (i=0;i<local_size;i++) 4362 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4363 lerror = PetscAbsScalar(x[i]-y[i]); 4364 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4365 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4366 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4367 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4368 if (error > PETSC_SMALL) { 4369 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4370 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4371 } else { 4372 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4373 } 4374 } 4375 } 4376 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4377 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4378 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4379 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4380 if (error > PETSC_SMALL) { 4381 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4382 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4383 } else { 4384 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4385 } 4386 } 4387 ierr = VecDestroy(&x);CHKERRQ(ierr); 4388 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4389 } 4390 4391 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4392 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4393 if (isseqaij) { 4394 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4395 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4396 } else { 4397 Mat work_mat; 4398 4399 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4400 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4401 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4402 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4403 } 4404 if (matis->A->symmetric_set) { 4405 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4406 #if !defined(PETSC_USE_COMPLEX) 4407 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4408 #endif 4409 } 4410 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4411 PetscFunctionReturn(0); 4412 } 4413 4414 #undef __FUNCT__ 4415 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4416 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4417 { 4418 PC_IS* pcis = (PC_IS*)(pc->data); 4419 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4420 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4421 PetscInt *idx_R_local=NULL; 4422 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4423 PetscInt vbs,bs; 4424 PetscBT bitmask=NULL; 4425 PetscErrorCode ierr; 4426 4427 PetscFunctionBegin; 4428 /* 4429 No need to setup local scatters if 4430 - primal space is unchanged 4431 AND 4432 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4433 AND 4434 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4435 */ 4436 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4437 PetscFunctionReturn(0); 4438 } 4439 /* destroy old objects */ 4440 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4441 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4442 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4443 /* Set Non-overlapping dimensions */ 4444 n_B = pcis->n_B; 4445 n_D = pcis->n - n_B; 4446 n_vertices = pcbddc->n_vertices; 4447 4448 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4449 4450 /* create auxiliary bitmask and allocate workspace */ 4451 if (!sub_schurs || !sub_schurs->reuse_solver) { 4452 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4453 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4454 for (i=0;i<n_vertices;i++) { 4455 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4456 } 4457 4458 for (i=0, n_R=0; i<pcis->n; i++) { 4459 if (!PetscBTLookup(bitmask,i)) { 4460 idx_R_local[n_R++] = i; 4461 } 4462 } 4463 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4464 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4465 4466 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4467 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4468 } 4469 4470 /* Block code */ 4471 vbs = 1; 4472 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4473 if (bs>1 && !(n_vertices%bs)) { 4474 PetscBool is_blocked = PETSC_TRUE; 4475 PetscInt *vary; 4476 if (!sub_schurs || !sub_schurs->reuse_solver) { 4477 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4478 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4479 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4480 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4481 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4482 for (i=0; i<pcis->n/bs; i++) { 4483 if (vary[i]!=0 && vary[i]!=bs) { 4484 is_blocked = PETSC_FALSE; 4485 break; 4486 } 4487 } 4488 ierr = PetscFree(vary);CHKERRQ(ierr); 4489 } else { 4490 /* Verify directly the R set */ 4491 for (i=0; i<n_R/bs; i++) { 4492 PetscInt j,node=idx_R_local[bs*i]; 4493 for (j=1; j<bs; j++) { 4494 if (node != idx_R_local[bs*i+j]-j) { 4495 is_blocked = PETSC_FALSE; 4496 break; 4497 } 4498 } 4499 } 4500 } 4501 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4502 vbs = bs; 4503 for (i=0;i<n_R/vbs;i++) { 4504 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4505 } 4506 } 4507 } 4508 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4509 if (sub_schurs && sub_schurs->reuse_solver) { 4510 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4511 4512 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4513 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4514 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4515 reuse_solver->is_R = pcbddc->is_R_local; 4516 } else { 4517 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4518 } 4519 4520 /* print some info if requested */ 4521 if (pcbddc->dbg_flag) { 4522 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4523 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4524 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4525 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4526 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4527 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4528 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4529 } 4530 4531 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4532 if (!sub_schurs || !sub_schurs->reuse_solver) { 4533 IS is_aux1,is_aux2; 4534 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4535 4536 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4537 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4538 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4539 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4540 for (i=0; i<n_D; i++) { 4541 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4542 } 4543 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4544 for (i=0, j=0; i<n_R; i++) { 4545 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4546 aux_array1[j++] = i; 4547 } 4548 } 4549 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4550 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4551 for (i=0, j=0; i<n_B; i++) { 4552 if (!PetscBTLookup(bitmask,is_indices[i])) { 4553 aux_array2[j++] = i; 4554 } 4555 } 4556 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4557 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4558 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4559 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4560 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4561 4562 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4563 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4564 for (i=0, j=0; i<n_R; i++) { 4565 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4566 aux_array1[j++] = i; 4567 } 4568 } 4569 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4570 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4571 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4572 } 4573 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4574 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4575 } else { 4576 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4577 IS tis; 4578 PetscInt schur_size; 4579 4580 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4581 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4582 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4583 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4584 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4585 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4586 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4587 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4588 } 4589 } 4590 PetscFunctionReturn(0); 4591 } 4592 4593 4594 #undef __FUNCT__ 4595 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4596 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4597 { 4598 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4599 PC_IS *pcis = (PC_IS*)pc->data; 4600 PC pc_temp; 4601 Mat A_RR; 4602 MatReuse reuse; 4603 PetscScalar m_one = -1.0; 4604 PetscReal value; 4605 PetscInt n_D,n_R; 4606 PetscBool check_corr[2],issbaij; 4607 PetscErrorCode ierr; 4608 /* prefixes stuff */ 4609 char dir_prefix[256],neu_prefix[256],str_level[16]; 4610 size_t len; 4611 4612 PetscFunctionBegin; 4613 4614 /* compute prefixes */ 4615 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4616 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4617 if (!pcbddc->current_level) { 4618 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4619 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4620 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4621 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4622 } else { 4623 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4624 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4625 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4626 len -= 15; /* remove "pc_bddc_coarse_" */ 4627 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4628 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4629 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4630 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4631 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4632 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4633 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4634 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4635 } 4636 4637 /* DIRICHLET PROBLEM */ 4638 if (dirichlet) { 4639 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4640 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4641 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4642 if (pcbddc->dbg_flag) { 4643 Mat A_IIn; 4644 4645 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4646 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4647 pcis->A_II = A_IIn; 4648 } 4649 } 4650 if (pcbddc->local_mat->symmetric_set) { 4651 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4652 } 4653 /* Matrix for Dirichlet problem is pcis->A_II */ 4654 n_D = pcis->n - pcis->n_B; 4655 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4656 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4657 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4658 /* default */ 4659 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4660 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4661 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4662 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4663 if (issbaij) { 4664 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4665 } else { 4666 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4667 } 4668 /* Allow user's customization */ 4669 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4670 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4671 } 4672 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4673 if (sub_schurs && sub_schurs->reuse_solver) { 4674 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4675 4676 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4677 } 4678 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4679 if (!n_D) { 4680 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4681 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4682 } 4683 /* Set Up KSP for Dirichlet problem of BDDC */ 4684 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4685 /* set ksp_D into pcis data */ 4686 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4687 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4688 pcis->ksp_D = pcbddc->ksp_D; 4689 } 4690 4691 /* NEUMANN PROBLEM */ 4692 A_RR = 0; 4693 if (neumann) { 4694 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4695 PetscInt ibs,mbs; 4696 PetscBool issbaij; 4697 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4698 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4699 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4700 if (pcbddc->ksp_R) { /* already created ksp */ 4701 PetscInt nn_R; 4702 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4703 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4704 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4705 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4706 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4707 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4708 reuse = MAT_INITIAL_MATRIX; 4709 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4710 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4711 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4712 reuse = MAT_INITIAL_MATRIX; 4713 } else { /* safe to reuse the matrix */ 4714 reuse = MAT_REUSE_MATRIX; 4715 } 4716 } 4717 /* last check */ 4718 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4719 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4720 reuse = MAT_INITIAL_MATRIX; 4721 } 4722 } else { /* first time, so we need to create the matrix */ 4723 reuse = MAT_INITIAL_MATRIX; 4724 } 4725 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4726 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4727 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4728 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4729 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4730 if (matis->A == pcbddc->local_mat) { 4731 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4732 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4733 } else { 4734 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4735 } 4736 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4737 if (matis->A == pcbddc->local_mat) { 4738 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4739 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4740 } else { 4741 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4742 } 4743 } 4744 /* extract A_RR */ 4745 if (sub_schurs && sub_schurs->reuse_solver) { 4746 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4747 4748 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4749 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4750 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4751 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4752 } else { 4753 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4754 } 4755 } else { 4756 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4757 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4758 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4759 } 4760 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4761 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4762 } 4763 if (pcbddc->local_mat->symmetric_set) { 4764 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4765 } 4766 if (!pcbddc->ksp_R) { /* create object if not present */ 4767 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4768 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4769 /* default */ 4770 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4771 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4772 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4773 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4774 if (issbaij) { 4775 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4776 } else { 4777 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4778 } 4779 /* Allow user's customization */ 4780 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4781 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4782 } 4783 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4784 if (!n_R) { 4785 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4786 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4787 } 4788 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4789 /* Reuse solver if it is present */ 4790 if (sub_schurs && sub_schurs->reuse_solver) { 4791 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4792 4793 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4794 } 4795 /* Set Up KSP for Neumann problem of BDDC */ 4796 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4797 } 4798 4799 if (pcbddc->dbg_flag) { 4800 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4801 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4802 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4803 } 4804 4805 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4806 check_corr[0] = check_corr[1] = PETSC_FALSE; 4807 if (pcbddc->NullSpace_corr[0]) { 4808 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4809 } 4810 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4811 check_corr[0] = PETSC_TRUE; 4812 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4813 } 4814 if (neumann && pcbddc->NullSpace_corr[2]) { 4815 check_corr[1] = PETSC_TRUE; 4816 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4817 } 4818 4819 /* check Dirichlet and Neumann solvers */ 4820 if (pcbddc->dbg_flag) { 4821 if (dirichlet) { /* Dirichlet */ 4822 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4823 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4824 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4825 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4826 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4827 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4828 if (check_corr[0]) { 4829 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4830 } 4831 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4832 } 4833 if (neumann) { /* Neumann */ 4834 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4835 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4836 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4837 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4838 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4839 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4840 if (check_corr[1]) { 4841 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4842 } 4843 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4844 } 4845 } 4846 /* free Neumann problem's matrix */ 4847 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4848 PetscFunctionReturn(0); 4849 } 4850 4851 #undef __FUNCT__ 4852 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4853 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4854 { 4855 PetscErrorCode ierr; 4856 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4857 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4858 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4859 4860 PetscFunctionBegin; 4861 if (!reuse_solver) { 4862 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4863 } 4864 if (!pcbddc->switch_static) { 4865 if (applytranspose && pcbddc->local_auxmat1) { 4866 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4867 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4868 } 4869 if (!reuse_solver) { 4870 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4871 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4872 } else { 4873 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4874 4875 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4876 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4877 } 4878 } else { 4879 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4880 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4881 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4882 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4883 if (applytranspose && pcbddc->local_auxmat1) { 4884 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4885 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4886 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4887 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4888 } 4889 } 4890 if (!reuse_solver || pcbddc->switch_static) { 4891 if (applytranspose) { 4892 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4893 } else { 4894 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4895 } 4896 } else { 4897 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4898 4899 if (applytranspose) { 4900 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4901 } else { 4902 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4903 } 4904 } 4905 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4906 if (!pcbddc->switch_static) { 4907 if (!reuse_solver) { 4908 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4909 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4910 } else { 4911 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4912 4913 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4914 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4915 } 4916 if (!applytranspose && pcbddc->local_auxmat1) { 4917 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4918 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4919 } 4920 } else { 4921 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4922 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4923 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4924 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4925 if (!applytranspose && pcbddc->local_auxmat1) { 4926 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4927 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4928 } 4929 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4930 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4931 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4932 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4933 } 4934 PetscFunctionReturn(0); 4935 } 4936 4937 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4938 #undef __FUNCT__ 4939 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4940 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4941 { 4942 PetscErrorCode ierr; 4943 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4944 PC_IS* pcis = (PC_IS*) (pc->data); 4945 const PetscScalar zero = 0.0; 4946 4947 PetscFunctionBegin; 4948 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4949 if (!pcbddc->benign_apply_coarse_only) { 4950 if (applytranspose) { 4951 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4952 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4953 } else { 4954 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4955 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4956 } 4957 } else { 4958 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4959 } 4960 4961 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4962 if (pcbddc->benign_n) { 4963 PetscScalar *array; 4964 PetscInt j; 4965 4966 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4967 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4968 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4969 } 4970 4971 /* start communications from local primal nodes to rhs of coarse solver */ 4972 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4973 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4974 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4975 4976 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4977 if (pcbddc->coarse_ksp) { 4978 Mat coarse_mat; 4979 Vec rhs,sol; 4980 MatNullSpace nullsp; 4981 PetscBool isbddc = PETSC_FALSE; 4982 4983 if (pcbddc->benign_have_null) { 4984 PC coarse_pc; 4985 4986 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4987 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4988 /* we need to propagate to coarser levels the need for a possible benign correction */ 4989 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4990 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4991 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4992 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4993 } 4994 } 4995 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4996 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4997 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4998 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4999 if (nullsp) { 5000 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5001 } 5002 if (applytranspose) { 5003 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5004 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5005 } else { 5006 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5007 PC coarse_pc; 5008 5009 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5010 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5011 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5012 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5013 } else { 5014 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5015 } 5016 } 5017 /* we don't need the benign correction at coarser levels anymore */ 5018 if (pcbddc->benign_have_null && isbddc) { 5019 PC coarse_pc; 5020 PC_BDDC* coarsepcbddc; 5021 5022 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5023 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5024 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5025 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5026 } 5027 if (nullsp) { 5028 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5029 } 5030 } 5031 5032 /* Local solution on R nodes */ 5033 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5034 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5035 } 5036 /* communications from coarse sol to local primal nodes */ 5037 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5038 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5039 5040 /* Sum contributions from the two levels */ 5041 if (!pcbddc->benign_apply_coarse_only) { 5042 if (applytranspose) { 5043 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5044 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5045 } else { 5046 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5047 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5048 } 5049 /* store p0 */ 5050 if (pcbddc->benign_n) { 5051 PetscScalar *array; 5052 PetscInt j; 5053 5054 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5055 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5056 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5057 } 5058 } else { /* expand the coarse solution */ 5059 if (applytranspose) { 5060 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5061 } else { 5062 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5063 } 5064 } 5065 PetscFunctionReturn(0); 5066 } 5067 5068 #undef __FUNCT__ 5069 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 5070 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5071 { 5072 PetscErrorCode ierr; 5073 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5074 PetscScalar *array; 5075 Vec from,to; 5076 5077 PetscFunctionBegin; 5078 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5079 from = pcbddc->coarse_vec; 5080 to = pcbddc->vec1_P; 5081 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5082 Vec tvec; 5083 5084 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5085 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5086 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5087 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5088 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5089 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5090 } 5091 } else { /* from local to global -> put data in coarse right hand side */ 5092 from = pcbddc->vec1_P; 5093 to = pcbddc->coarse_vec; 5094 } 5095 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5096 PetscFunctionReturn(0); 5097 } 5098 5099 #undef __FUNCT__ 5100 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 5101 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5102 { 5103 PetscErrorCode ierr; 5104 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5105 PetscScalar *array; 5106 Vec from,to; 5107 5108 PetscFunctionBegin; 5109 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5110 from = pcbddc->coarse_vec; 5111 to = pcbddc->vec1_P; 5112 } else { /* from local to global -> put data in coarse right hand side */ 5113 from = pcbddc->vec1_P; 5114 to = pcbddc->coarse_vec; 5115 } 5116 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5117 if (smode == SCATTER_FORWARD) { 5118 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5119 Vec tvec; 5120 5121 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5122 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5123 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5124 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5125 } 5126 } else { 5127 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5128 ierr = VecResetArray(from);CHKERRQ(ierr); 5129 } 5130 } 5131 PetscFunctionReturn(0); 5132 } 5133 5134 /* uncomment for testing purposes */ 5135 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5136 #undef __FUNCT__ 5137 #define __FUNCT__ "PCBDDCConstraintsSetUp" 5138 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5139 { 5140 PetscErrorCode ierr; 5141 PC_IS* pcis = (PC_IS*)(pc->data); 5142 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5143 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5144 /* one and zero */ 5145 PetscScalar one=1.0,zero=0.0; 5146 /* space to store constraints and their local indices */ 5147 PetscScalar *constraints_data; 5148 PetscInt *constraints_idxs,*constraints_idxs_B; 5149 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5150 PetscInt *constraints_n; 5151 /* iterators */ 5152 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5153 /* BLAS integers */ 5154 PetscBLASInt lwork,lierr; 5155 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5156 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5157 /* reuse */ 5158 PetscInt olocal_primal_size,olocal_primal_size_cc; 5159 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5160 /* change of basis */ 5161 PetscBool qr_needed; 5162 PetscBT change_basis,qr_needed_idx; 5163 /* auxiliary stuff */ 5164 PetscInt *nnz,*is_indices; 5165 PetscInt ncc; 5166 /* some quantities */ 5167 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5168 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5169 5170 PetscFunctionBegin; 5171 /* Destroy Mat objects computed previously */ 5172 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5173 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5174 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5175 /* save info on constraints from previous setup (if any) */ 5176 olocal_primal_size = pcbddc->local_primal_size; 5177 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5178 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5179 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5180 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5181 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5182 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5183 5184 if (!pcbddc->adaptive_selection) { 5185 IS ISForVertices,*ISForFaces,*ISForEdges; 5186 MatNullSpace nearnullsp; 5187 const Vec *nearnullvecs; 5188 Vec *localnearnullsp; 5189 PetscScalar *array; 5190 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5191 PetscBool nnsp_has_cnst; 5192 /* LAPACK working arrays for SVD or POD */ 5193 PetscBool skip_lapack,boolforchange; 5194 PetscScalar *work; 5195 PetscReal *singular_vals; 5196 #if defined(PETSC_USE_COMPLEX) 5197 PetscReal *rwork; 5198 #endif 5199 #if defined(PETSC_MISSING_LAPACK_GESVD) 5200 PetscScalar *temp_basis,*correlation_mat; 5201 #else 5202 PetscBLASInt dummy_int=1; 5203 PetscScalar dummy_scalar=1.; 5204 #endif 5205 5206 /* Get index sets for faces, edges and vertices from graph */ 5207 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5208 /* print some info */ 5209 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5210 PetscInt nv; 5211 5212 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5213 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5214 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5217 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5218 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5219 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5220 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5221 } 5222 5223 /* free unneeded index sets */ 5224 if (!pcbddc->use_vertices) { 5225 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5226 } 5227 if (!pcbddc->use_edges) { 5228 for (i=0;i<n_ISForEdges;i++) { 5229 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5230 } 5231 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5232 n_ISForEdges = 0; 5233 } 5234 if (!pcbddc->use_faces) { 5235 for (i=0;i<n_ISForFaces;i++) { 5236 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5237 } 5238 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5239 n_ISForFaces = 0; 5240 } 5241 5242 /* check if near null space is attached to global mat */ 5243 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5244 if (nearnullsp) { 5245 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5246 /* remove any stored info */ 5247 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5248 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5249 /* store information for BDDC solver reuse */ 5250 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5251 pcbddc->onearnullspace = nearnullsp; 5252 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5253 for (i=0;i<nnsp_size;i++) { 5254 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5255 } 5256 } else { /* if near null space is not provided BDDC uses constants by default */ 5257 nnsp_size = 0; 5258 nnsp_has_cnst = PETSC_TRUE; 5259 } 5260 /* get max number of constraints on a single cc */ 5261 max_constraints = nnsp_size; 5262 if (nnsp_has_cnst) max_constraints++; 5263 5264 /* 5265 Evaluate maximum storage size needed by the procedure 5266 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5267 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5268 There can be multiple constraints per connected component 5269 */ 5270 n_vertices = 0; 5271 if (ISForVertices) { 5272 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5273 } 5274 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5275 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5276 5277 total_counts = n_ISForFaces+n_ISForEdges; 5278 total_counts *= max_constraints; 5279 total_counts += n_vertices; 5280 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5281 5282 total_counts = 0; 5283 max_size_of_constraint = 0; 5284 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5285 IS used_is; 5286 if (i<n_ISForEdges) { 5287 used_is = ISForEdges[i]; 5288 } else { 5289 used_is = ISForFaces[i-n_ISForEdges]; 5290 } 5291 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5292 total_counts += j; 5293 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5294 } 5295 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5296 5297 /* get local part of global near null space vectors */ 5298 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5299 for (k=0;k<nnsp_size;k++) { 5300 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5301 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5302 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5303 } 5304 5305 /* whether or not to skip lapack calls */ 5306 skip_lapack = PETSC_TRUE; 5307 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5308 5309 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5310 if (!skip_lapack) { 5311 PetscScalar temp_work; 5312 5313 #if defined(PETSC_MISSING_LAPACK_GESVD) 5314 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5315 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5316 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5317 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5318 #if defined(PETSC_USE_COMPLEX) 5319 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5320 #endif 5321 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5322 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5323 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5324 lwork = -1; 5325 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5326 #if !defined(PETSC_USE_COMPLEX) 5327 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5328 #else 5329 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5330 #endif 5331 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5332 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5333 #else /* on missing GESVD */ 5334 /* SVD */ 5335 PetscInt max_n,min_n; 5336 max_n = max_size_of_constraint; 5337 min_n = max_constraints; 5338 if (max_size_of_constraint < max_constraints) { 5339 min_n = max_size_of_constraint; 5340 max_n = max_constraints; 5341 } 5342 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5343 #if defined(PETSC_USE_COMPLEX) 5344 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5345 #endif 5346 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5347 lwork = -1; 5348 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5349 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5350 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5351 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5352 #if !defined(PETSC_USE_COMPLEX) 5353 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5354 #else 5355 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5356 #endif 5357 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5358 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5359 #endif /* on missing GESVD */ 5360 /* Allocate optimal workspace */ 5361 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5362 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5363 } 5364 /* Now we can loop on constraining sets */ 5365 total_counts = 0; 5366 constraints_idxs_ptr[0] = 0; 5367 constraints_data_ptr[0] = 0; 5368 /* vertices */ 5369 if (n_vertices) { 5370 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5371 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5372 for (i=0;i<n_vertices;i++) { 5373 constraints_n[total_counts] = 1; 5374 constraints_data[total_counts] = 1.0; 5375 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5376 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5377 total_counts++; 5378 } 5379 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5380 n_vertices = total_counts; 5381 } 5382 5383 /* edges and faces */ 5384 total_counts_cc = total_counts; 5385 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5386 IS used_is; 5387 PetscBool idxs_copied = PETSC_FALSE; 5388 5389 if (ncc<n_ISForEdges) { 5390 used_is = ISForEdges[ncc]; 5391 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5392 } else { 5393 used_is = ISForFaces[ncc-n_ISForEdges]; 5394 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5395 } 5396 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5397 5398 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5399 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5400 /* change of basis should not be performed on local periodic nodes */ 5401 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5402 if (nnsp_has_cnst) { 5403 PetscScalar quad_value; 5404 5405 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5406 idxs_copied = PETSC_TRUE; 5407 5408 if (!pcbddc->use_nnsp_true) { 5409 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5410 } else { 5411 quad_value = 1.0; 5412 } 5413 for (j=0;j<size_of_constraint;j++) { 5414 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5415 } 5416 temp_constraints++; 5417 total_counts++; 5418 } 5419 for (k=0;k<nnsp_size;k++) { 5420 PetscReal real_value; 5421 PetscScalar *ptr_to_data; 5422 5423 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5424 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5425 for (j=0;j<size_of_constraint;j++) { 5426 ptr_to_data[j] = array[is_indices[j]]; 5427 } 5428 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5429 /* check if array is null on the connected component */ 5430 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5431 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5432 if (real_value > 0.0) { /* keep indices and values */ 5433 temp_constraints++; 5434 total_counts++; 5435 if (!idxs_copied) { 5436 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5437 idxs_copied = PETSC_TRUE; 5438 } 5439 } 5440 } 5441 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5442 valid_constraints = temp_constraints; 5443 if (!pcbddc->use_nnsp_true && temp_constraints) { 5444 if (temp_constraints == 1) { /* just normalize the constraint */ 5445 PetscScalar norm,*ptr_to_data; 5446 5447 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5449 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5450 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5451 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5452 } else { /* perform SVD */ 5453 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5454 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5455 5456 #if defined(PETSC_MISSING_LAPACK_GESVD) 5457 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5458 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5459 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5460 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5461 from that computed using LAPACKgesvd 5462 -> This is due to a different computation of eigenvectors in LAPACKheev 5463 -> The quality of the POD-computed basis will be the same */ 5464 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5465 /* Store upper triangular part of correlation matrix */ 5466 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5467 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5468 for (j=0;j<temp_constraints;j++) { 5469 for (k=0;k<j+1;k++) { 5470 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5471 } 5472 } 5473 /* compute eigenvalues and eigenvectors of correlation matrix */ 5474 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5475 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5476 #if !defined(PETSC_USE_COMPLEX) 5477 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5478 #else 5479 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5480 #endif 5481 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5482 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5483 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5484 j = 0; 5485 while (j < temp_constraints && singular_vals[j] < tol) j++; 5486 total_counts = total_counts-j; 5487 valid_constraints = temp_constraints-j; 5488 /* scale and copy POD basis into used quadrature memory */ 5489 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5490 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5491 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5492 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5493 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5494 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5495 if (j<temp_constraints) { 5496 PetscInt ii; 5497 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5498 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5499 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5500 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5501 for (k=0;k<temp_constraints-j;k++) { 5502 for (ii=0;ii<size_of_constraint;ii++) { 5503 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5504 } 5505 } 5506 } 5507 #else /* on missing GESVD */ 5508 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5509 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5510 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5511 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5512 #if !defined(PETSC_USE_COMPLEX) 5513 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5514 #else 5515 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5516 #endif 5517 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5518 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5519 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5520 k = temp_constraints; 5521 if (k > size_of_constraint) k = size_of_constraint; 5522 j = 0; 5523 while (j < k && singular_vals[k-j-1] < tol) j++; 5524 valid_constraints = k-j; 5525 total_counts = total_counts-temp_constraints+valid_constraints; 5526 #endif /* on missing GESVD */ 5527 } 5528 } 5529 /* update pointers information */ 5530 if (valid_constraints) { 5531 constraints_n[total_counts_cc] = valid_constraints; 5532 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5533 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5534 /* set change_of_basis flag */ 5535 if (boolforchange) { 5536 PetscBTSet(change_basis,total_counts_cc); 5537 } 5538 total_counts_cc++; 5539 } 5540 } 5541 /* free workspace */ 5542 if (!skip_lapack) { 5543 ierr = PetscFree(work);CHKERRQ(ierr); 5544 #if defined(PETSC_USE_COMPLEX) 5545 ierr = PetscFree(rwork);CHKERRQ(ierr); 5546 #endif 5547 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5548 #if defined(PETSC_MISSING_LAPACK_GESVD) 5549 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5550 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5551 #endif 5552 } 5553 for (k=0;k<nnsp_size;k++) { 5554 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5555 } 5556 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5557 /* free index sets of faces, edges and vertices */ 5558 for (i=0;i<n_ISForFaces;i++) { 5559 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5560 } 5561 if (n_ISForFaces) { 5562 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5563 } 5564 for (i=0;i<n_ISForEdges;i++) { 5565 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5566 } 5567 if (n_ISForEdges) { 5568 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5569 } 5570 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5571 } else { 5572 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5573 5574 total_counts = 0; 5575 n_vertices = 0; 5576 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5577 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5578 } 5579 max_constraints = 0; 5580 total_counts_cc = 0; 5581 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5582 total_counts += pcbddc->adaptive_constraints_n[i]; 5583 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5584 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5585 } 5586 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5587 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5588 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5589 constraints_data = pcbddc->adaptive_constraints_data; 5590 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5591 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5592 total_counts_cc = 0; 5593 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5594 if (pcbddc->adaptive_constraints_n[i]) { 5595 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5596 } 5597 } 5598 #if 0 5599 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5600 for (i=0;i<total_counts_cc;i++) { 5601 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5602 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5603 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5604 printf(" %d",constraints_idxs[j]); 5605 } 5606 printf("\n"); 5607 printf("number of cc: %d\n",constraints_n[i]); 5608 } 5609 for (i=0;i<n_vertices;i++) { 5610 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5611 } 5612 for (i=0;i<sub_schurs->n_subs;i++) { 5613 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5614 } 5615 #endif 5616 5617 max_size_of_constraint = 0; 5618 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5619 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5620 /* Change of basis */ 5621 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5622 if (pcbddc->use_change_of_basis) { 5623 for (i=0;i<sub_schurs->n_subs;i++) { 5624 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5625 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5626 } 5627 } 5628 } 5629 } 5630 pcbddc->local_primal_size = total_counts; 5631 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5632 5633 /* map constraints_idxs in boundary numbering */ 5634 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5635 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5636 5637 /* Create constraint matrix */ 5638 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5639 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5640 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5641 5642 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5643 /* determine if a QR strategy is needed for change of basis */ 5644 qr_needed = PETSC_FALSE; 5645 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5646 total_primal_vertices=0; 5647 pcbddc->local_primal_size_cc = 0; 5648 for (i=0;i<total_counts_cc;i++) { 5649 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5650 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5651 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5652 pcbddc->local_primal_size_cc += 1; 5653 } else if (PetscBTLookup(change_basis,i)) { 5654 for (k=0;k<constraints_n[i];k++) { 5655 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5656 } 5657 pcbddc->local_primal_size_cc += constraints_n[i]; 5658 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5659 PetscBTSet(qr_needed_idx,i); 5660 qr_needed = PETSC_TRUE; 5661 } 5662 } else { 5663 pcbddc->local_primal_size_cc += 1; 5664 } 5665 } 5666 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5667 pcbddc->n_vertices = total_primal_vertices; 5668 /* permute indices in order to have a sorted set of vertices */ 5669 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5670 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5671 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5672 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5673 5674 /* nonzero structure of constraint matrix */ 5675 /* and get reference dof for local constraints */ 5676 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5677 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5678 5679 j = total_primal_vertices; 5680 total_counts = total_primal_vertices; 5681 cum = total_primal_vertices; 5682 for (i=n_vertices;i<total_counts_cc;i++) { 5683 if (!PetscBTLookup(change_basis,i)) { 5684 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5685 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5686 cum++; 5687 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5688 for (k=0;k<constraints_n[i];k++) { 5689 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5690 nnz[j+k] = size_of_constraint; 5691 } 5692 j += constraints_n[i]; 5693 } 5694 } 5695 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5696 ierr = PetscFree(nnz);CHKERRQ(ierr); 5697 5698 /* set values in constraint matrix */ 5699 for (i=0;i<total_primal_vertices;i++) { 5700 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5701 } 5702 total_counts = total_primal_vertices; 5703 for (i=n_vertices;i<total_counts_cc;i++) { 5704 if (!PetscBTLookup(change_basis,i)) { 5705 PetscInt *cols; 5706 5707 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5708 cols = constraints_idxs+constraints_idxs_ptr[i]; 5709 for (k=0;k<constraints_n[i];k++) { 5710 PetscInt row = total_counts+k; 5711 PetscScalar *vals; 5712 5713 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5714 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5715 } 5716 total_counts += constraints_n[i]; 5717 } 5718 } 5719 /* assembling */ 5720 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5721 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5722 5723 /* 5724 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5725 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5726 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5727 */ 5728 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5729 if (pcbddc->use_change_of_basis) { 5730 /* dual and primal dofs on a single cc */ 5731 PetscInt dual_dofs,primal_dofs; 5732 /* working stuff for GEQRF */ 5733 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5734 PetscBLASInt lqr_work; 5735 /* working stuff for UNGQR */ 5736 PetscScalar *gqr_work,lgqr_work_t; 5737 PetscBLASInt lgqr_work; 5738 /* working stuff for TRTRS */ 5739 PetscScalar *trs_rhs; 5740 PetscBLASInt Blas_NRHS; 5741 /* pointers for values insertion into change of basis matrix */ 5742 PetscInt *start_rows,*start_cols; 5743 PetscScalar *start_vals; 5744 /* working stuff for values insertion */ 5745 PetscBT is_primal; 5746 PetscInt *aux_primal_numbering_B; 5747 /* matrix sizes */ 5748 PetscInt global_size,local_size; 5749 /* temporary change of basis */ 5750 Mat localChangeOfBasisMatrix; 5751 /* extra space for debugging */ 5752 PetscScalar *dbg_work; 5753 5754 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5755 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5756 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5757 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5758 /* nonzeros for local mat */ 5759 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5760 if (!pcbddc->benign_change || pcbddc->fake_change) { 5761 for (i=0;i<pcis->n;i++) nnz[i]=1; 5762 } else { 5763 const PetscInt *ii; 5764 PetscInt n; 5765 PetscBool flg_row; 5766 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5767 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5768 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5769 } 5770 for (i=n_vertices;i<total_counts_cc;i++) { 5771 if (PetscBTLookup(change_basis,i)) { 5772 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5773 if (PetscBTLookup(qr_needed_idx,i)) { 5774 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5775 } else { 5776 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5777 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5778 } 5779 } 5780 } 5781 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5782 ierr = PetscFree(nnz);CHKERRQ(ierr); 5783 /* Set interior change in the matrix */ 5784 if (!pcbddc->benign_change || pcbddc->fake_change) { 5785 for (i=0;i<pcis->n;i++) { 5786 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5787 } 5788 } else { 5789 const PetscInt *ii,*jj; 5790 PetscScalar *aa; 5791 PetscInt n; 5792 PetscBool flg_row; 5793 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5794 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5795 for (i=0;i<n;i++) { 5796 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5797 } 5798 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5799 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5800 } 5801 5802 if (pcbddc->dbg_flag) { 5803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5804 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5805 } 5806 5807 5808 /* Now we loop on the constraints which need a change of basis */ 5809 /* 5810 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5811 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5812 5813 Basic blocks of change of basis matrix T computed by 5814 5815 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5816 5817 | 1 0 ... 0 s_1/S | 5818 | 0 1 ... 0 s_2/S | 5819 | ... | 5820 | 0 ... 1 s_{n-1}/S | 5821 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5822 5823 with S = \sum_{i=1}^n s_i^2 5824 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5825 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5826 5827 - QR decomposition of constraints otherwise 5828 */ 5829 if (qr_needed) { 5830 /* space to store Q */ 5831 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5832 /* array to store scaling factors for reflectors */ 5833 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5834 /* first we issue queries for optimal work */ 5835 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5836 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5837 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5838 lqr_work = -1; 5839 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5840 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5841 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5842 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5843 lgqr_work = -1; 5844 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5845 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5846 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5847 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5848 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5849 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5850 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5851 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5852 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5853 /* array to store rhs and solution of triangular solver */ 5854 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5855 /* allocating workspace for check */ 5856 if (pcbddc->dbg_flag) { 5857 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5858 } 5859 } 5860 /* array to store whether a node is primal or not */ 5861 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5862 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5863 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5864 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5865 for (i=0;i<total_primal_vertices;i++) { 5866 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5867 } 5868 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5869 5870 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5871 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5872 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5873 if (PetscBTLookup(change_basis,total_counts)) { 5874 /* get constraint info */ 5875 primal_dofs = constraints_n[total_counts]; 5876 dual_dofs = size_of_constraint-primal_dofs; 5877 5878 if (pcbddc->dbg_flag) { 5879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5880 } 5881 5882 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5883 5884 /* copy quadrature constraints for change of basis check */ 5885 if (pcbddc->dbg_flag) { 5886 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5887 } 5888 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5889 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5890 5891 /* compute QR decomposition of constraints */ 5892 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5893 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5894 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5895 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5896 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5897 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5898 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5899 5900 /* explictly compute R^-T */ 5901 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5902 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5903 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5904 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5905 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5906 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5907 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5908 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5909 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5910 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5911 5912 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5913 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5914 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5915 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5916 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5917 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5918 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5919 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5920 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5921 5922 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5923 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5924 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5925 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5926 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5927 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5928 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5929 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5930 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5931 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5932 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5933 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5934 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5935 5936 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5937 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5938 /* insert cols for primal dofs */ 5939 for (j=0;j<primal_dofs;j++) { 5940 start_vals = &qr_basis[j*size_of_constraint]; 5941 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5942 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5943 } 5944 /* insert cols for dual dofs */ 5945 for (j=0,k=0;j<dual_dofs;k++) { 5946 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5947 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5948 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5949 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5950 j++; 5951 } 5952 } 5953 5954 /* check change of basis */ 5955 if (pcbddc->dbg_flag) { 5956 PetscInt ii,jj; 5957 PetscBool valid_qr=PETSC_TRUE; 5958 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5959 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5960 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5961 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5962 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5963 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5964 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5965 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5966 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5967 for (jj=0;jj<size_of_constraint;jj++) { 5968 for (ii=0;ii<primal_dofs;ii++) { 5969 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5970 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5971 } 5972 } 5973 if (!valid_qr) { 5974 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5975 for (jj=0;jj<size_of_constraint;jj++) { 5976 for (ii=0;ii<primal_dofs;ii++) { 5977 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5978 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5979 } 5980 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5981 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5982 } 5983 } 5984 } 5985 } else { 5986 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5987 } 5988 } 5989 } else { /* simple transformation block */ 5990 PetscInt row,col; 5991 PetscScalar val,norm; 5992 5993 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5994 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5995 for (j=0;j<size_of_constraint;j++) { 5996 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5997 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5998 if (!PetscBTLookup(is_primal,row_B)) { 5999 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6000 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6001 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6002 } else { 6003 for (k=0;k<size_of_constraint;k++) { 6004 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6005 if (row != col) { 6006 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6007 } else { 6008 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6009 } 6010 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6011 } 6012 } 6013 } 6014 if (pcbddc->dbg_flag) { 6015 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6016 } 6017 } 6018 } else { 6019 if (pcbddc->dbg_flag) { 6020 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6021 } 6022 } 6023 } 6024 6025 /* free workspace */ 6026 if (qr_needed) { 6027 if (pcbddc->dbg_flag) { 6028 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6029 } 6030 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6031 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6032 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6033 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6034 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6035 } 6036 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6037 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6038 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6039 6040 /* assembling of global change of variable */ 6041 if (!pcbddc->fake_change) { 6042 Mat tmat; 6043 PetscInt bs; 6044 6045 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6046 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6047 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6048 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6049 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6050 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6051 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6052 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6053 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6054 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6055 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6056 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6057 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6058 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6059 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6060 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6061 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6062 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6063 6064 /* check */ 6065 if (pcbddc->dbg_flag) { 6066 PetscReal error; 6067 Vec x,x_change; 6068 6069 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6070 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6071 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6072 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6073 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6074 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6075 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6076 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6077 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6078 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6079 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6080 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6081 if (error > PETSC_SMALL) { 6082 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6083 } 6084 ierr = VecDestroy(&x);CHKERRQ(ierr); 6085 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6086 } 6087 /* adapt sub_schurs computed (if any) */ 6088 if (pcbddc->use_deluxe_scaling) { 6089 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6090 6091 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6092 if (sub_schurs && sub_schurs->S_Ej_all) { 6093 Mat S_new,tmat; 6094 IS is_all_N,is_V_Sall = NULL; 6095 6096 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6097 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6098 if (pcbddc->deluxe_zerorows) { 6099 ISLocalToGlobalMapping NtoSall; 6100 IS is_V; 6101 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6102 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6103 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6104 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6105 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6106 } 6107 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6108 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6109 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6110 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6111 if (pcbddc->deluxe_zerorows) { 6112 const PetscScalar *array; 6113 const PetscInt *idxs_V,*idxs_all; 6114 PetscInt i,n_V; 6115 6116 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6117 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6118 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6119 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6120 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6121 for (i=0;i<n_V;i++) { 6122 PetscScalar val; 6123 PetscInt idx; 6124 6125 idx = idxs_V[i]; 6126 val = array[idxs_all[idxs_V[i]]]; 6127 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6128 } 6129 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6130 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6131 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6132 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6133 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6134 } 6135 sub_schurs->S_Ej_all = S_new; 6136 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6137 if (sub_schurs->sum_S_Ej_all) { 6138 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6139 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6140 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6141 if (pcbddc->deluxe_zerorows) { 6142 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6143 } 6144 sub_schurs->sum_S_Ej_all = S_new; 6145 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6146 } 6147 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6148 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6149 } 6150 /* destroy any change of basis context in sub_schurs */ 6151 if (sub_schurs && sub_schurs->change) { 6152 PetscInt i; 6153 6154 for (i=0;i<sub_schurs->n_subs;i++) { 6155 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6156 } 6157 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6158 } 6159 } 6160 if (pcbddc->switch_static) { /* need to save the local change */ 6161 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6162 } else { 6163 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6164 } 6165 /* determine if any process has changed the pressures locally */ 6166 pcbddc->change_interior = pcbddc->benign_have_null; 6167 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6168 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6169 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6170 pcbddc->use_qr_single = qr_needed; 6171 } 6172 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6173 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6174 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6175 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6176 } else { 6177 Mat benign_global = NULL; 6178 if (pcbddc->benign_have_null) { 6179 Mat tmat; 6180 6181 pcbddc->change_interior = PETSC_TRUE; 6182 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6183 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6184 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6185 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6186 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6187 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6188 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6189 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6190 if (pcbddc->benign_change) { 6191 Mat M; 6192 6193 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6194 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6195 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6196 ierr = MatDestroy(&M);CHKERRQ(ierr); 6197 } else { 6198 Mat eye; 6199 PetscScalar *array; 6200 6201 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6202 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6203 for (i=0;i<pcis->n;i++) { 6204 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6205 } 6206 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6207 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6208 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6209 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6210 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6211 } 6212 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6213 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6214 } 6215 if (pcbddc->user_ChangeOfBasisMatrix) { 6216 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6217 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6218 } else if (pcbddc->benign_have_null) { 6219 pcbddc->ChangeOfBasisMatrix = benign_global; 6220 } 6221 } 6222 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6223 IS is_global; 6224 const PetscInt *gidxs; 6225 6226 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6227 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6228 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6229 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6230 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6231 } 6232 } 6233 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6234 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6235 } 6236 6237 if (!pcbddc->fake_change) { 6238 /* add pressure dofs to set of primal nodes for numbering purposes */ 6239 for (i=0;i<pcbddc->benign_n;i++) { 6240 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6241 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6242 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6243 pcbddc->local_primal_size_cc++; 6244 pcbddc->local_primal_size++; 6245 } 6246 6247 /* check if a new primal space has been introduced (also take into account benign trick) */ 6248 pcbddc->new_primal_space_local = PETSC_TRUE; 6249 if (olocal_primal_size == pcbddc->local_primal_size) { 6250 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6251 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6252 if (!pcbddc->new_primal_space_local) { 6253 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6254 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6255 } 6256 } 6257 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6258 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6259 } 6260 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6261 6262 /* flush dbg viewer */ 6263 if (pcbddc->dbg_flag) { 6264 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6265 } 6266 6267 /* free workspace */ 6268 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6269 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6270 if (!pcbddc->adaptive_selection) { 6271 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6272 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6273 } else { 6274 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6275 pcbddc->adaptive_constraints_idxs_ptr, 6276 pcbddc->adaptive_constraints_data_ptr, 6277 pcbddc->adaptive_constraints_idxs, 6278 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6279 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6280 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6281 } 6282 PetscFunctionReturn(0); 6283 } 6284 6285 #undef __FUNCT__ 6286 #define __FUNCT__ "PCBDDCAnalyzeInterface" 6287 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6288 { 6289 ISLocalToGlobalMapping map; 6290 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6291 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6292 PetscInt i,N; 6293 PetscBool rcsr = PETSC_FALSE; 6294 PetscErrorCode ierr; 6295 6296 PetscFunctionBegin; 6297 if (pcbddc->recompute_topography) { 6298 pcbddc->graphanalyzed = PETSC_FALSE; 6299 /* Reset previously computed graph */ 6300 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6301 /* Init local Graph struct */ 6302 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6303 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6304 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6305 6306 /* Check validity of the csr graph passed in by the user */ 6307 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6308 6309 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6310 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6311 PetscInt *xadj,*adjncy; 6312 PetscInt nvtxs; 6313 PetscBool flg_row=PETSC_FALSE; 6314 6315 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6316 if (flg_row) { 6317 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6318 pcbddc->computed_rowadj = PETSC_TRUE; 6319 } 6320 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6321 rcsr = PETSC_TRUE; 6322 } 6323 if (pcbddc->dbg_flag) { 6324 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6325 } 6326 6327 /* Setup of Graph */ 6328 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6329 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6330 6331 /* attach info on disconnected subdomains if present */ 6332 if (pcbddc->n_local_subs) { 6333 PetscInt *local_subs; 6334 6335 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6336 for (i=0;i<pcbddc->n_local_subs;i++) { 6337 const PetscInt *idxs; 6338 PetscInt nl,j; 6339 6340 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6341 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6342 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6343 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6344 } 6345 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6346 pcbddc->mat_graph->local_subs = local_subs; 6347 } 6348 } 6349 6350 if (!pcbddc->graphanalyzed) { 6351 /* Graph's connected components analysis */ 6352 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6353 pcbddc->graphanalyzed = PETSC_TRUE; 6354 } 6355 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6356 PetscFunctionReturn(0); 6357 } 6358 6359 #undef __FUNCT__ 6360 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6361 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6362 { 6363 PetscInt i,j; 6364 PetscScalar *alphas; 6365 PetscErrorCode ierr; 6366 6367 PetscFunctionBegin; 6368 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6369 for (i=0;i<n;i++) { 6370 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6371 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6372 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6373 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6374 } 6375 ierr = PetscFree(alphas);CHKERRQ(ierr); 6376 PetscFunctionReturn(0); 6377 } 6378 6379 #undef __FUNCT__ 6380 #define __FUNCT__ "PCBDDCMatISGetSubassemblingPattern" 6381 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6382 { 6383 Mat A; 6384 PetscInt n_neighs,*neighs,*n_shared,**shared; 6385 PetscMPIInt size,rank,color; 6386 PetscInt *xadj,*adjncy; 6387 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6388 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6389 PetscInt void_procs,*procs_candidates = NULL; 6390 PetscInt xadj_count,*count; 6391 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6392 PetscSubcomm psubcomm; 6393 MPI_Comm subcomm; 6394 PetscErrorCode ierr; 6395 6396 PetscFunctionBegin; 6397 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6398 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6399 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6400 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6401 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6402 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6403 6404 if (have_void) *have_void = PETSC_FALSE; 6405 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6406 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6407 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6408 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6409 im_active = !!n; 6410 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6411 void_procs = size - active_procs; 6412 /* get ranks of of non-active processes in mat communicator */ 6413 if (void_procs) { 6414 PetscInt ncand; 6415 6416 if (have_void) *have_void = PETSC_TRUE; 6417 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6418 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6419 for (i=0,ncand=0;i<size;i++) { 6420 if (!procs_candidates[i]) { 6421 procs_candidates[ncand++] = i; 6422 } 6423 } 6424 /* force n_subdomains to be not greater that the number of non-active processes */ 6425 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6426 } 6427 6428 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6429 number of subdomains requested 1 -> send to master or first candidate in voids */ 6430 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6431 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6432 PetscInt issize,isidx,dest; 6433 if (*n_subdomains == 1) dest = 0; 6434 else dest = rank; 6435 if (im_active) { 6436 issize = 1; 6437 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6438 isidx = procs_candidates[dest]; 6439 } else { 6440 isidx = dest; 6441 } 6442 } else { 6443 issize = 0; 6444 isidx = -1; 6445 } 6446 if (*n_subdomains != 1) *n_subdomains = active_procs; 6447 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6448 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6449 PetscFunctionReturn(0); 6450 } 6451 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6452 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6453 threshold = PetscMax(threshold,2); 6454 6455 /* Get info on mapping */ 6456 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6457 6458 /* build local CSR graph of subdomains' connectivity */ 6459 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6460 xadj[0] = 0; 6461 xadj[1] = PetscMax(n_neighs-1,0); 6462 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6463 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6464 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6465 for (i=1;i<n_neighs;i++) 6466 for (j=0;j<n_shared[i];j++) 6467 count[shared[i][j]] += 1; 6468 6469 xadj_count = 0; 6470 for (i=1;i<n_neighs;i++) { 6471 for (j=0;j<n_shared[i];j++) { 6472 if (count[shared[i][j]] < threshold) { 6473 adjncy[xadj_count] = neighs[i]; 6474 adjncy_wgt[xadj_count] = n_shared[i]; 6475 xadj_count++; 6476 break; 6477 } 6478 } 6479 } 6480 xadj[1] = xadj_count; 6481 ierr = PetscFree(count);CHKERRQ(ierr); 6482 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6483 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6484 6485 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6486 6487 /* Restrict work on active processes only */ 6488 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6489 if (void_procs) { 6490 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6491 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6492 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6493 subcomm = PetscSubcommChild(psubcomm); 6494 } else { 6495 psubcomm = NULL; 6496 subcomm = PetscObjectComm((PetscObject)mat); 6497 } 6498 6499 v_wgt = NULL; 6500 if (!color) { 6501 ierr = PetscFree(xadj);CHKERRQ(ierr); 6502 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6503 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6504 } else { 6505 Mat subdomain_adj; 6506 IS new_ranks,new_ranks_contig; 6507 MatPartitioning partitioner; 6508 PetscInt rstart=0,rend=0; 6509 PetscInt *is_indices,*oldranks; 6510 PetscMPIInt size; 6511 PetscBool aggregate; 6512 6513 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6514 if (void_procs) { 6515 PetscInt prank = rank; 6516 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6517 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6518 for (i=0;i<xadj[1];i++) { 6519 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6520 } 6521 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6522 } else { 6523 oldranks = NULL; 6524 } 6525 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6526 if (aggregate) { /* TODO: all this part could be made more efficient */ 6527 PetscInt lrows,row,ncols,*cols; 6528 PetscMPIInt nrank; 6529 PetscScalar *vals; 6530 6531 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6532 lrows = 0; 6533 if (nrank<redprocs) { 6534 lrows = size/redprocs; 6535 if (nrank<size%redprocs) lrows++; 6536 } 6537 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6538 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6539 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6540 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6541 row = nrank; 6542 ncols = xadj[1]-xadj[0]; 6543 cols = adjncy; 6544 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6545 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6546 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6547 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6548 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6549 ierr = PetscFree(xadj);CHKERRQ(ierr); 6550 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6551 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6552 ierr = PetscFree(vals);CHKERRQ(ierr); 6553 if (use_vwgt) { 6554 Vec v; 6555 const PetscScalar *array; 6556 PetscInt nl; 6557 6558 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6559 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6560 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6561 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6562 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6563 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6564 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6565 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6566 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6567 ierr = VecDestroy(&v);CHKERRQ(ierr); 6568 } 6569 } else { 6570 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6571 if (use_vwgt) { 6572 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6573 v_wgt[0] = n; 6574 } 6575 } 6576 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6577 6578 /* Partition */ 6579 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6580 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6581 if (v_wgt) { 6582 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6583 } 6584 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6585 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6586 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6587 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6588 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6589 6590 /* renumber new_ranks to avoid "holes" in new set of processors */ 6591 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6592 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6593 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6594 if (!aggregate) { 6595 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6596 #if defined(PETSC_USE_DEBUG) 6597 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6598 #endif 6599 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6600 } else if (oldranks) { 6601 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6602 } else { 6603 ranks_send_to_idx[0] = is_indices[0]; 6604 } 6605 } else { 6606 PetscInt idxs[1]; 6607 PetscMPIInt tag; 6608 MPI_Request *reqs; 6609 6610 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6611 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6612 for (i=rstart;i<rend;i++) { 6613 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6614 } 6615 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6616 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6617 ierr = PetscFree(reqs);CHKERRQ(ierr); 6618 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6619 #if defined(PETSC_USE_DEBUG) 6620 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6621 #endif 6622 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6623 } else if (oldranks) { 6624 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6625 } else { 6626 ranks_send_to_idx[0] = idxs[0]; 6627 } 6628 } 6629 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6630 /* clean up */ 6631 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6632 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6633 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6634 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6635 } 6636 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6637 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6638 6639 /* assemble parallel IS for sends */ 6640 i = 1; 6641 if (!color) i=0; 6642 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6643 PetscFunctionReturn(0); 6644 } 6645 6646 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6647 6648 #undef __FUNCT__ 6649 #define __FUNCT__ "PCBDDCMatISSubassemble" 6650 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6651 { 6652 Mat local_mat; 6653 IS is_sends_internal; 6654 PetscInt rows,cols,new_local_rows; 6655 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6656 PetscBool ismatis,isdense,newisdense,destroy_mat; 6657 ISLocalToGlobalMapping l2gmap; 6658 PetscInt* l2gmap_indices; 6659 const PetscInt* is_indices; 6660 MatType new_local_type; 6661 /* buffers */ 6662 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6663 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6664 PetscInt *recv_buffer_idxs_local; 6665 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6666 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6667 /* MPI */ 6668 MPI_Comm comm,comm_n; 6669 PetscSubcomm subcomm; 6670 PetscMPIInt n_sends,n_recvs,commsize; 6671 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6672 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6673 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6674 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6675 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6676 PetscErrorCode ierr; 6677 6678 PetscFunctionBegin; 6679 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6680 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6681 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6682 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6683 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6684 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6685 PetscValidLogicalCollectiveBool(mat,reuse,6); 6686 PetscValidLogicalCollectiveInt(mat,nis,8); 6687 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6688 if (nvecs) { 6689 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6690 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6691 } 6692 /* further checks */ 6693 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6694 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6695 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6696 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6697 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6698 if (reuse && *mat_n) { 6699 PetscInt mrows,mcols,mnrows,mncols; 6700 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6701 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6702 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6703 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6704 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6705 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6706 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6707 } 6708 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6709 PetscValidLogicalCollectiveInt(mat,bs,0); 6710 6711 /* prepare IS for sending if not provided */ 6712 if (!is_sends) { 6713 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6714 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6715 } else { 6716 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6717 is_sends_internal = is_sends; 6718 } 6719 6720 /* get comm */ 6721 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6722 6723 /* compute number of sends */ 6724 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6725 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6726 6727 /* compute number of receives */ 6728 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6729 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6730 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6731 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6732 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6733 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6734 ierr = PetscFree(iflags);CHKERRQ(ierr); 6735 6736 /* restrict comm if requested */ 6737 subcomm = 0; 6738 destroy_mat = PETSC_FALSE; 6739 if (restrict_comm) { 6740 PetscMPIInt color,subcommsize; 6741 6742 color = 0; 6743 if (restrict_full) { 6744 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6745 } else { 6746 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6747 } 6748 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6749 subcommsize = commsize - subcommsize; 6750 /* check if reuse has been requested */ 6751 if (reuse) { 6752 if (*mat_n) { 6753 PetscMPIInt subcommsize2; 6754 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6755 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6756 comm_n = PetscObjectComm((PetscObject)*mat_n); 6757 } else { 6758 comm_n = PETSC_COMM_SELF; 6759 } 6760 } else { /* MAT_INITIAL_MATRIX */ 6761 PetscMPIInt rank; 6762 6763 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6764 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6765 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6766 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6767 comm_n = PetscSubcommChild(subcomm); 6768 } 6769 /* flag to destroy *mat_n if not significative */ 6770 if (color) destroy_mat = PETSC_TRUE; 6771 } else { 6772 comm_n = comm; 6773 } 6774 6775 /* prepare send/receive buffers */ 6776 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6777 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6778 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6779 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6780 if (nis) { 6781 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6782 } 6783 6784 /* Get data from local matrices */ 6785 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6786 /* TODO: See below some guidelines on how to prepare the local buffers */ 6787 /* 6788 send_buffer_vals should contain the raw values of the local matrix 6789 send_buffer_idxs should contain: 6790 - MatType_PRIVATE type 6791 - PetscInt size_of_l2gmap 6792 - PetscInt global_row_indices[size_of_l2gmap] 6793 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6794 */ 6795 else { 6796 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6797 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6798 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6799 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6800 send_buffer_idxs[1] = i; 6801 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6802 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6803 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6804 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6805 for (i=0;i<n_sends;i++) { 6806 ilengths_vals[is_indices[i]] = len*len; 6807 ilengths_idxs[is_indices[i]] = len+2; 6808 } 6809 } 6810 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6811 /* additional is (if any) */ 6812 if (nis) { 6813 PetscMPIInt psum; 6814 PetscInt j; 6815 for (j=0,psum=0;j<nis;j++) { 6816 PetscInt plen; 6817 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6818 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6819 psum += len+1; /* indices + lenght */ 6820 } 6821 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6822 for (j=0,psum=0;j<nis;j++) { 6823 PetscInt plen; 6824 const PetscInt *is_array_idxs; 6825 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6826 send_buffer_idxs_is[psum] = plen; 6827 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6828 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6829 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6830 psum += plen+1; /* indices + lenght */ 6831 } 6832 for (i=0;i<n_sends;i++) { 6833 ilengths_idxs_is[is_indices[i]] = psum; 6834 } 6835 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6836 } 6837 6838 buf_size_idxs = 0; 6839 buf_size_vals = 0; 6840 buf_size_idxs_is = 0; 6841 buf_size_vecs = 0; 6842 for (i=0;i<n_recvs;i++) { 6843 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6844 buf_size_vals += (PetscInt)olengths_vals[i]; 6845 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6846 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6847 } 6848 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6849 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6850 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6851 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6852 6853 /* get new tags for clean communications */ 6854 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6855 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6856 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6857 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6858 6859 /* allocate for requests */ 6860 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6861 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6862 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6863 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6864 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6865 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6866 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6867 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6868 6869 /* communications */ 6870 ptr_idxs = recv_buffer_idxs; 6871 ptr_vals = recv_buffer_vals; 6872 ptr_idxs_is = recv_buffer_idxs_is; 6873 ptr_vecs = recv_buffer_vecs; 6874 for (i=0;i<n_recvs;i++) { 6875 source_dest = onodes[i]; 6876 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6877 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6878 ptr_idxs += olengths_idxs[i]; 6879 ptr_vals += olengths_vals[i]; 6880 if (nis) { 6881 source_dest = onodes_is[i]; 6882 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6883 ptr_idxs_is += olengths_idxs_is[i]; 6884 } 6885 if (nvecs) { 6886 source_dest = onodes[i]; 6887 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6888 ptr_vecs += olengths_idxs[i]-2; 6889 } 6890 } 6891 for (i=0;i<n_sends;i++) { 6892 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6893 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6894 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6895 if (nis) { 6896 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6897 } 6898 if (nvecs) { 6899 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6900 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6901 } 6902 } 6903 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6904 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6905 6906 /* assemble new l2g map */ 6907 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6908 ptr_idxs = recv_buffer_idxs; 6909 new_local_rows = 0; 6910 for (i=0;i<n_recvs;i++) { 6911 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6912 ptr_idxs += olengths_idxs[i]; 6913 } 6914 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6915 ptr_idxs = recv_buffer_idxs; 6916 new_local_rows = 0; 6917 for (i=0;i<n_recvs;i++) { 6918 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6919 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6920 ptr_idxs += olengths_idxs[i]; 6921 } 6922 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6923 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6924 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6925 6926 /* infer new local matrix type from received local matrices type */ 6927 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6928 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6929 if (n_recvs) { 6930 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6931 ptr_idxs = recv_buffer_idxs; 6932 for (i=0;i<n_recvs;i++) { 6933 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6934 new_local_type_private = MATAIJ_PRIVATE; 6935 break; 6936 } 6937 ptr_idxs += olengths_idxs[i]; 6938 } 6939 switch (new_local_type_private) { 6940 case MATDENSE_PRIVATE: 6941 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6942 new_local_type = MATSEQAIJ; 6943 bs = 1; 6944 } else { /* if I receive only 1 dense matrix */ 6945 new_local_type = MATSEQDENSE; 6946 bs = 1; 6947 } 6948 break; 6949 case MATAIJ_PRIVATE: 6950 new_local_type = MATSEQAIJ; 6951 bs = 1; 6952 break; 6953 case MATBAIJ_PRIVATE: 6954 new_local_type = MATSEQBAIJ; 6955 break; 6956 case MATSBAIJ_PRIVATE: 6957 new_local_type = MATSEQSBAIJ; 6958 break; 6959 default: 6960 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6961 break; 6962 } 6963 } else { /* by default, new_local_type is seqdense */ 6964 new_local_type = MATSEQDENSE; 6965 bs = 1; 6966 } 6967 6968 /* create MATIS object if needed */ 6969 if (!reuse) { 6970 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6971 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6972 } else { 6973 /* it also destroys the local matrices */ 6974 if (*mat_n) { 6975 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6976 } else { /* this is a fake object */ 6977 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6978 } 6979 } 6980 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6981 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6982 6983 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6984 6985 /* Global to local map of received indices */ 6986 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6987 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6988 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6989 6990 /* restore attributes -> type of incoming data and its size */ 6991 buf_size_idxs = 0; 6992 for (i=0;i<n_recvs;i++) { 6993 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6994 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6995 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6996 } 6997 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6998 6999 /* set preallocation */ 7000 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7001 if (!newisdense) { 7002 PetscInt *new_local_nnz=0; 7003 7004 ptr_idxs = recv_buffer_idxs_local; 7005 if (n_recvs) { 7006 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7007 } 7008 for (i=0;i<n_recvs;i++) { 7009 PetscInt j; 7010 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7011 for (j=0;j<*(ptr_idxs+1);j++) { 7012 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7013 } 7014 } else { 7015 /* TODO */ 7016 } 7017 ptr_idxs += olengths_idxs[i]; 7018 } 7019 if (new_local_nnz) { 7020 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7021 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7022 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7023 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7024 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7025 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7026 } else { 7027 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7028 } 7029 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7030 } else { 7031 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7032 } 7033 7034 /* set values */ 7035 ptr_vals = recv_buffer_vals; 7036 ptr_idxs = recv_buffer_idxs_local; 7037 for (i=0;i<n_recvs;i++) { 7038 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7039 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7040 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7041 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7042 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7043 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7044 } else { 7045 /* TODO */ 7046 } 7047 ptr_idxs += olengths_idxs[i]; 7048 ptr_vals += olengths_vals[i]; 7049 } 7050 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7051 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7052 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7053 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7054 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7055 7056 #if 0 7057 if (!restrict_comm) { /* check */ 7058 Vec lvec,rvec; 7059 PetscReal infty_error; 7060 7061 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7062 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7063 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7064 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7065 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7066 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7067 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7068 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7069 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7070 } 7071 #endif 7072 7073 /* assemble new additional is (if any) */ 7074 if (nis) { 7075 PetscInt **temp_idxs,*count_is,j,psum; 7076 7077 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7078 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7079 ptr_idxs = recv_buffer_idxs_is; 7080 psum = 0; 7081 for (i=0;i<n_recvs;i++) { 7082 for (j=0;j<nis;j++) { 7083 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7084 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7085 psum += plen; 7086 ptr_idxs += plen+1; /* shift pointer to received data */ 7087 } 7088 } 7089 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7090 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7091 for (i=1;i<nis;i++) { 7092 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7093 } 7094 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7095 ptr_idxs = recv_buffer_idxs_is; 7096 for (i=0;i<n_recvs;i++) { 7097 for (j=0;j<nis;j++) { 7098 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7099 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7100 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7101 ptr_idxs += plen+1; /* shift pointer to received data */ 7102 } 7103 } 7104 for (i=0;i<nis;i++) { 7105 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7106 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7107 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7108 } 7109 ierr = PetscFree(count_is);CHKERRQ(ierr); 7110 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7111 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7112 } 7113 /* free workspace */ 7114 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7115 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7116 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7117 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7118 if (isdense) { 7119 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7120 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7121 } else { 7122 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7123 } 7124 if (nis) { 7125 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7126 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7127 } 7128 7129 if (nvecs) { 7130 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7131 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7132 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7133 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7134 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7135 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7136 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7137 /* set values */ 7138 ptr_vals = recv_buffer_vecs; 7139 ptr_idxs = recv_buffer_idxs_local; 7140 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7141 for (i=0;i<n_recvs;i++) { 7142 PetscInt j; 7143 for (j=0;j<*(ptr_idxs+1);j++) { 7144 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7145 } 7146 ptr_idxs += olengths_idxs[i]; 7147 ptr_vals += olengths_idxs[i]-2; 7148 } 7149 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7150 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7151 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7152 } 7153 7154 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7155 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7156 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7157 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7158 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7159 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7160 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7161 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7162 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7163 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7164 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7165 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7166 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7167 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7168 ierr = PetscFree(onodes);CHKERRQ(ierr); 7169 if (nis) { 7170 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7171 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7172 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7173 } 7174 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7175 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7176 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7177 for (i=0;i<nis;i++) { 7178 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7179 } 7180 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7181 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7182 } 7183 *mat_n = NULL; 7184 } 7185 PetscFunctionReturn(0); 7186 } 7187 7188 /* temporary hack into ksp private data structure */ 7189 #include <petsc/private/kspimpl.h> 7190 7191 #undef __FUNCT__ 7192 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 7193 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7194 { 7195 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7196 PC_IS *pcis = (PC_IS*)pc->data; 7197 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7198 Mat coarsedivudotp = NULL; 7199 Mat coarseG,t_coarse_mat_is; 7200 MatNullSpace CoarseNullSpace = NULL; 7201 ISLocalToGlobalMapping coarse_islg; 7202 IS coarse_is,*isarray; 7203 PetscInt i,im_active=-1,active_procs=-1; 7204 PetscInt nis,nisdofs,nisneu,nisvert; 7205 PC pc_temp; 7206 PCType coarse_pc_type; 7207 KSPType coarse_ksp_type; 7208 PetscBool multilevel_requested,multilevel_allowed; 7209 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7210 PetscInt ncoarse,nedcfield; 7211 PetscBool compute_vecs = PETSC_FALSE; 7212 PetscScalar *array; 7213 MatReuse coarse_mat_reuse; 7214 PetscBool restr, full_restr, have_void; 7215 PetscErrorCode ierr; 7216 7217 PetscFunctionBegin; 7218 /* Assign global numbering to coarse dofs */ 7219 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7220 PetscInt ocoarse_size; 7221 compute_vecs = PETSC_TRUE; 7222 ocoarse_size = pcbddc->coarse_size; 7223 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7224 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7225 /* see if we can avoid some work */ 7226 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7227 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7228 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7229 PC pc; 7230 PetscBool isbddc; 7231 7232 /* temporary workaround since PCBDDC does not have a reset method so far */ 7233 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 7234 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 7235 if (isbddc) { 7236 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 7237 } else { 7238 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7239 } 7240 coarse_reuse = PETSC_FALSE; 7241 } else { /* we can safely reuse already computed coarse matrix */ 7242 coarse_reuse = PETSC_TRUE; 7243 } 7244 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7245 coarse_reuse = PETSC_FALSE; 7246 } 7247 /* reset any subassembling information */ 7248 if (!coarse_reuse || pcbddc->recompute_topography) { 7249 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7250 } 7251 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7252 coarse_reuse = PETSC_TRUE; 7253 } 7254 /* assemble coarse matrix */ 7255 if (coarse_reuse && pcbddc->coarse_ksp) { 7256 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7257 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7258 coarse_mat_reuse = MAT_REUSE_MATRIX; 7259 } else { 7260 coarse_mat = NULL; 7261 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7262 } 7263 7264 /* creates temporary l2gmap and IS for coarse indexes */ 7265 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7266 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7267 7268 /* creates temporary MATIS object for coarse matrix */ 7269 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7270 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7271 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7272 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7273 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7274 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7275 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7276 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7277 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7278 7279 /* count "active" (i.e. with positive local size) and "void" processes */ 7280 im_active = !!(pcis->n); 7281 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7282 7283 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7284 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7285 /* full_restr : just use the receivers from the subassembling pattern */ 7286 coarse_mat_is = NULL; 7287 multilevel_allowed = PETSC_FALSE; 7288 multilevel_requested = PETSC_FALSE; 7289 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7290 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7291 if (multilevel_requested) { 7292 ncoarse = active_procs/pcbddc->coarsening_ratio; 7293 restr = PETSC_FALSE; 7294 full_restr = PETSC_FALSE; 7295 } else { 7296 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7297 restr = PETSC_TRUE; 7298 full_restr = PETSC_TRUE; 7299 } 7300 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7301 ncoarse = PetscMax(1,ncoarse); 7302 if (!pcbddc->coarse_subassembling) { 7303 if (pcbddc->coarsening_ratio > 1) { 7304 if (multilevel_requested) { 7305 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7306 } else { 7307 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7308 } 7309 } else { 7310 PetscMPIInt size,rank; 7311 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7312 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7313 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7314 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7315 } 7316 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7317 PetscInt psum; 7318 PetscMPIInt size; 7319 if (pcbddc->coarse_ksp) psum = 1; 7320 else psum = 0; 7321 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7322 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7323 if (ncoarse < size) have_void = PETSC_TRUE; 7324 } 7325 /* determine if we can go multilevel */ 7326 if (multilevel_requested) { 7327 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7328 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7329 } 7330 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7331 7332 /* dump subassembling pattern */ 7333 if (pcbddc->dbg_flag && multilevel_allowed) { 7334 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7335 } 7336 7337 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7338 nedcfield = -1; 7339 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7340 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7341 const PetscInt *idxs; 7342 ISLocalToGlobalMapping tmap; 7343 7344 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7345 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7346 /* allocate space for temporary storage */ 7347 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7348 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7349 /* allocate for IS array */ 7350 nisdofs = pcbddc->n_ISForDofsLocal; 7351 if (pcbddc->nedclocal) { 7352 if (pcbddc->nedfield > -1) { 7353 nedcfield = pcbddc->nedfield; 7354 } else { 7355 nedcfield = 0; 7356 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7357 nisdofs = 1; 7358 } 7359 } 7360 nisneu = !!pcbddc->NeumannBoundariesLocal; 7361 nisvert = 0; /* nisvert is not used */ 7362 nis = nisdofs + nisneu + nisvert; 7363 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7364 /* dofs splitting */ 7365 for (i=0;i<nisdofs;i++) { 7366 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7367 if (nedcfield != i) { 7368 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7369 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7370 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7371 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7372 } else { 7373 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7374 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7375 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7376 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7377 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7378 } 7379 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7380 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7381 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7382 } 7383 /* neumann boundaries */ 7384 if (pcbddc->NeumannBoundariesLocal) { 7385 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7386 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7387 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7388 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7389 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7390 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7391 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7392 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7393 } 7394 /* free memory */ 7395 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7396 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7397 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7398 } else { 7399 nis = 0; 7400 nisdofs = 0; 7401 nisneu = 0; 7402 nisvert = 0; 7403 isarray = NULL; 7404 } 7405 /* destroy no longer needed map */ 7406 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7407 7408 /* subassemble */ 7409 if (multilevel_allowed) { 7410 Vec vp[1]; 7411 PetscInt nvecs = 0; 7412 PetscBool reuse,reuser; 7413 7414 if (coarse_mat) reuse = PETSC_TRUE; 7415 else reuse = PETSC_FALSE; 7416 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7417 vp[0] = NULL; 7418 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7419 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7420 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7421 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7422 nvecs = 1; 7423 7424 if (pcbddc->divudotp) { 7425 Mat B,loc_divudotp; 7426 Vec v,p; 7427 IS dummy; 7428 PetscInt np; 7429 7430 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7431 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7432 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7433 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7434 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7435 ierr = VecSet(p,1.);CHKERRQ(ierr); 7436 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7437 ierr = VecDestroy(&p);CHKERRQ(ierr); 7438 ierr = MatDestroy(&B);CHKERRQ(ierr); 7439 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7440 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7441 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7442 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7443 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7444 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7445 ierr = VecDestroy(&v);CHKERRQ(ierr); 7446 } 7447 } 7448 if (reuser) { 7449 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7450 } else { 7451 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7452 } 7453 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7454 PetscScalar *arraym,*arrayv; 7455 PetscInt nl; 7456 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7457 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7458 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7459 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7460 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7461 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7462 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7463 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7464 } else { 7465 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7466 } 7467 } else { 7468 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7469 } 7470 if (coarse_mat_is || coarse_mat) { 7471 PetscMPIInt size; 7472 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7473 if (!multilevel_allowed) { 7474 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7475 } else { 7476 Mat A; 7477 7478 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7479 if (coarse_mat_is) { 7480 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7481 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7482 coarse_mat = coarse_mat_is; 7483 } 7484 /* be sure we don't have MatSeqDENSE as local mat */ 7485 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7486 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7487 } 7488 } 7489 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7490 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7491 7492 /* create local to global scatters for coarse problem */ 7493 if (compute_vecs) { 7494 PetscInt lrows; 7495 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7496 if (coarse_mat) { 7497 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7498 } else { 7499 lrows = 0; 7500 } 7501 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7502 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7503 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7504 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7505 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7506 } 7507 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7508 7509 /* set defaults for coarse KSP and PC */ 7510 if (multilevel_allowed) { 7511 coarse_ksp_type = KSPRICHARDSON; 7512 coarse_pc_type = PCBDDC; 7513 } else { 7514 coarse_ksp_type = KSPPREONLY; 7515 coarse_pc_type = PCREDUNDANT; 7516 } 7517 7518 /* print some info if requested */ 7519 if (pcbddc->dbg_flag) { 7520 if (!multilevel_allowed) { 7521 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7522 if (multilevel_requested) { 7523 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7524 } else if (pcbddc->max_levels) { 7525 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7526 } 7527 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7528 } 7529 } 7530 7531 /* communicate coarse discrete gradient */ 7532 coarseG = NULL; 7533 if (pcbddc->nedcG && multilevel_allowed) { 7534 MPI_Comm ccomm; 7535 if (coarse_mat) { 7536 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7537 } else { 7538 ccomm = MPI_COMM_NULL; 7539 } 7540 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7541 } 7542 7543 /* create the coarse KSP object only once with defaults */ 7544 if (coarse_mat) { 7545 PetscViewer dbg_viewer = NULL; 7546 if (pcbddc->dbg_flag) { 7547 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7548 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7549 } 7550 if (!pcbddc->coarse_ksp) { 7551 char prefix[256],str_level[16]; 7552 size_t len; 7553 7554 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7555 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7556 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7557 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7558 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7559 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7560 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7561 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7562 /* TODO is this logic correct? should check for coarse_mat type */ 7563 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7564 /* prefix */ 7565 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7566 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7567 if (!pcbddc->current_level) { 7568 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7569 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7570 } else { 7571 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7572 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7573 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7574 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7575 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7576 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7577 } 7578 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7579 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7580 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7581 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7582 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7583 /* allow user customization */ 7584 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7585 } 7586 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7587 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7588 if (nisdofs) { 7589 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7590 for (i=0;i<nisdofs;i++) { 7591 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7592 } 7593 } 7594 if (nisneu) { 7595 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7596 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7597 } 7598 if (nisvert) { 7599 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7600 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7601 } 7602 if (coarseG) { 7603 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7604 } 7605 7606 /* get some info after set from options */ 7607 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7608 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7609 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7610 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7611 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7612 isbddc = PETSC_FALSE; 7613 } 7614 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7615 if (isredundant) { 7616 KSP inner_ksp; 7617 PC inner_pc; 7618 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7619 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7620 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7621 } 7622 7623 /* parameters which miss an API */ 7624 if (isbddc) { 7625 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7626 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7627 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7628 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7629 if (pcbddc_coarse->benign_saddle_point) { 7630 Mat coarsedivudotp_is; 7631 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7632 IS row,col; 7633 const PetscInt *gidxs; 7634 PetscInt n,st,M,N; 7635 7636 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7637 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7638 st = st-n; 7639 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7640 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7641 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7642 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7643 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7644 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7645 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7646 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7647 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7648 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7649 ierr = ISDestroy(&row);CHKERRQ(ierr); 7650 ierr = ISDestroy(&col);CHKERRQ(ierr); 7651 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7652 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7653 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7654 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7655 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7656 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7657 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7658 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7659 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7660 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7661 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7662 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7663 } 7664 } 7665 7666 /* propagate symmetry info of coarse matrix */ 7667 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7668 if (pc->pmat->symmetric_set) { 7669 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7670 } 7671 if (pc->pmat->hermitian_set) { 7672 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7673 } 7674 if (pc->pmat->spd_set) { 7675 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7676 } 7677 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7678 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7679 } 7680 /* set operators */ 7681 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7682 if (pcbddc->dbg_flag) { 7683 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7684 } 7685 } 7686 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7687 ierr = PetscFree(isarray);CHKERRQ(ierr); 7688 #if 0 7689 { 7690 PetscViewer viewer; 7691 char filename[256]; 7692 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7693 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7694 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7695 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7696 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7697 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7698 } 7699 #endif 7700 7701 if (pcbddc->coarse_ksp) { 7702 Vec crhs,csol; 7703 7704 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7705 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7706 if (!csol) { 7707 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7708 } 7709 if (!crhs) { 7710 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7711 } 7712 } 7713 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7714 7715 /* compute null space for coarse solver if the benign trick has been requested */ 7716 if (pcbddc->benign_null) { 7717 7718 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7719 for (i=0;i<pcbddc->benign_n;i++) { 7720 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7721 } 7722 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7723 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7724 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7725 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7726 if (coarse_mat) { 7727 Vec nullv; 7728 PetscScalar *array,*array2; 7729 PetscInt nl; 7730 7731 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7732 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7733 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7734 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7735 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7736 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7737 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7738 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7739 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7740 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7741 } 7742 } 7743 7744 if (pcbddc->coarse_ksp) { 7745 PetscBool ispreonly; 7746 7747 if (CoarseNullSpace) { 7748 PetscBool isnull; 7749 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7750 if (isnull) { 7751 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7752 } 7753 /* TODO: add local nullspaces (if any) */ 7754 } 7755 /* setup coarse ksp */ 7756 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7757 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7758 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7759 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7760 KSP check_ksp; 7761 KSPType check_ksp_type; 7762 PC check_pc; 7763 Vec check_vec,coarse_vec; 7764 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7765 PetscInt its; 7766 PetscBool compute_eigs; 7767 PetscReal *eigs_r,*eigs_c; 7768 PetscInt neigs; 7769 const char *prefix; 7770 7771 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7772 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7773 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7774 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7775 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7776 /* prevent from setup unneeded object */ 7777 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7778 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7779 if (ispreonly) { 7780 check_ksp_type = KSPPREONLY; 7781 compute_eigs = PETSC_FALSE; 7782 } else { 7783 check_ksp_type = KSPGMRES; 7784 compute_eigs = PETSC_TRUE; 7785 } 7786 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7787 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7788 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7789 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7790 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7791 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7792 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7793 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7794 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7795 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7796 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7797 /* create random vec */ 7798 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7799 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7800 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7801 /* solve coarse problem */ 7802 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7803 /* set eigenvalue estimation if preonly has not been requested */ 7804 if (compute_eigs) { 7805 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7806 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7807 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7808 if (neigs) { 7809 lambda_max = eigs_r[neigs-1]; 7810 lambda_min = eigs_r[0]; 7811 if (pcbddc->use_coarse_estimates) { 7812 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7813 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7814 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7815 } 7816 } 7817 } 7818 } 7819 7820 /* check coarse problem residual error */ 7821 if (pcbddc->dbg_flag) { 7822 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7823 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7824 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7825 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7826 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7827 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7828 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7829 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7830 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7831 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7832 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7833 if (CoarseNullSpace) { 7834 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7835 } 7836 if (compute_eigs) { 7837 PetscReal lambda_max_s,lambda_min_s; 7838 KSPConvergedReason reason; 7839 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7840 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7841 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7842 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7843 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7844 for (i=0;i<neigs;i++) { 7845 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7846 } 7847 } 7848 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7849 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7850 } 7851 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7852 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7853 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7854 if (compute_eigs) { 7855 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7856 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7857 } 7858 } 7859 } 7860 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7861 /* print additional info */ 7862 if (pcbddc->dbg_flag) { 7863 /* waits until all processes reaches this point */ 7864 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7865 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7866 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7867 } 7868 7869 /* free memory */ 7870 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7871 PetscFunctionReturn(0); 7872 } 7873 7874 #undef __FUNCT__ 7875 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7876 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7877 { 7878 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7879 PC_IS* pcis = (PC_IS*)pc->data; 7880 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7881 IS subset,subset_mult,subset_n; 7882 PetscInt local_size,coarse_size=0; 7883 PetscInt *local_primal_indices=NULL; 7884 const PetscInt *t_local_primal_indices; 7885 PetscErrorCode ierr; 7886 7887 PetscFunctionBegin; 7888 /* Compute global number of coarse dofs */ 7889 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7890 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7891 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7892 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7893 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7894 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7895 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7896 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7897 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7898 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7899 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7900 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7901 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7902 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7903 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7904 7905 /* check numbering */ 7906 if (pcbddc->dbg_flag) { 7907 PetscScalar coarsesum,*array,*array2; 7908 PetscInt i; 7909 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7910 7911 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7912 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7913 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7914 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7915 /* counter */ 7916 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7917 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7918 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7919 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7920 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7921 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7922 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7923 for (i=0;i<pcbddc->local_primal_size;i++) { 7924 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7925 } 7926 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7927 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7928 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7929 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7930 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7931 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7932 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7933 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7934 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7935 for (i=0;i<pcis->n;i++) { 7936 if (array[i] != 0.0 && array[i] != array2[i]) { 7937 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7938 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7939 set_error = PETSC_TRUE; 7940 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7941 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7942 } 7943 } 7944 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7945 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7947 for (i=0;i<pcis->n;i++) { 7948 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7949 } 7950 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7951 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7952 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7953 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7954 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7955 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7956 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7957 PetscInt *gidxs; 7958 7959 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7960 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7961 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7962 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7963 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7964 for (i=0;i<pcbddc->local_primal_size;i++) { 7965 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7966 } 7967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7968 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7969 } 7970 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7971 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7972 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7973 } 7974 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7975 /* get back data */ 7976 *coarse_size_n = coarse_size; 7977 *local_primal_indices_n = local_primal_indices; 7978 PetscFunctionReturn(0); 7979 } 7980 7981 #undef __FUNCT__ 7982 #define __FUNCT__ "PCBDDCGlobalToLocal" 7983 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7984 { 7985 IS localis_t; 7986 PetscInt i,lsize,*idxs,n; 7987 PetscScalar *vals; 7988 PetscErrorCode ierr; 7989 7990 PetscFunctionBegin; 7991 /* get indices in local ordering exploiting local to global map */ 7992 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7993 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7994 for (i=0;i<lsize;i++) vals[i] = 1.0; 7995 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7996 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7997 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7998 if (idxs) { /* multilevel guard */ 7999 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8000 } 8001 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8002 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8003 ierr = PetscFree(vals);CHKERRQ(ierr); 8004 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8005 /* now compute set in local ordering */ 8006 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8007 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8008 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8009 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8010 for (i=0,lsize=0;i<n;i++) { 8011 if (PetscRealPart(vals[i]) > 0.5) { 8012 lsize++; 8013 } 8014 } 8015 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8016 for (i=0,lsize=0;i<n;i++) { 8017 if (PetscRealPart(vals[i]) > 0.5) { 8018 idxs[lsize++] = i; 8019 } 8020 } 8021 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8022 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8023 *localis = localis_t; 8024 PetscFunctionReturn(0); 8025 } 8026 8027 #undef __FUNCT__ 8028 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 8029 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8030 { 8031 PC_IS *pcis=(PC_IS*)pc->data; 8032 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8033 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8034 Mat S_j; 8035 PetscInt *used_xadj,*used_adjncy; 8036 PetscBool free_used_adj; 8037 PetscErrorCode ierr; 8038 8039 PetscFunctionBegin; 8040 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8041 free_used_adj = PETSC_FALSE; 8042 if (pcbddc->sub_schurs_layers == -1) { 8043 used_xadj = NULL; 8044 used_adjncy = NULL; 8045 } else { 8046 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8047 used_xadj = pcbddc->mat_graph->xadj; 8048 used_adjncy = pcbddc->mat_graph->adjncy; 8049 } else if (pcbddc->computed_rowadj) { 8050 used_xadj = pcbddc->mat_graph->xadj; 8051 used_adjncy = pcbddc->mat_graph->adjncy; 8052 } else { 8053 PetscBool flg_row=PETSC_FALSE; 8054 const PetscInt *xadj,*adjncy; 8055 PetscInt nvtxs; 8056 8057 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8058 if (flg_row) { 8059 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8060 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8061 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8062 free_used_adj = PETSC_TRUE; 8063 } else { 8064 pcbddc->sub_schurs_layers = -1; 8065 used_xadj = NULL; 8066 used_adjncy = NULL; 8067 } 8068 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8069 } 8070 } 8071 8072 /* setup sub_schurs data */ 8073 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8074 if (!sub_schurs->schur_explicit) { 8075 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8076 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8077 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8078 } else { 8079 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8080 PetscBool isseqaij,need_change = PETSC_FALSE; 8081 PetscInt benign_n; 8082 Mat change = NULL; 8083 Vec scaling = NULL; 8084 IS change_primal = NULL; 8085 8086 if (!pcbddc->use_vertices && reuse_solvers) { 8087 PetscInt n_vertices; 8088 8089 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8090 reuse_solvers = (PetscBool)!n_vertices; 8091 } 8092 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8093 if (!isseqaij) { 8094 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8095 if (matis->A == pcbddc->local_mat) { 8096 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8097 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8098 } else { 8099 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8100 } 8101 } 8102 if (!pcbddc->benign_change_explicit) { 8103 benign_n = pcbddc->benign_n; 8104 } else { 8105 benign_n = 0; 8106 } 8107 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8108 We need a global reduction to avoid possible deadlocks. 8109 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8110 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8111 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8112 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8113 need_change = (PetscBool)(!need_change); 8114 } 8115 /* If the user defines additional constraints, we import them here. 8116 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8117 if (need_change) { 8118 PC_IS *pcisf; 8119 PC_BDDC *pcbddcf; 8120 PC pcf; 8121 8122 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8123 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8124 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8125 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8126 /* hacks */ 8127 pcisf = (PC_IS*)pcf->data; 8128 pcisf->is_B_local = pcis->is_B_local; 8129 pcisf->vec1_N = pcis->vec1_N; 8130 pcisf->BtoNmap = pcis->BtoNmap; 8131 pcisf->n = pcis->n; 8132 pcisf->n_B = pcis->n_B; 8133 pcbddcf = (PC_BDDC*)pcf->data; 8134 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8135 pcbddcf->mat_graph = pcbddc->mat_graph; 8136 pcbddcf->use_faces = PETSC_TRUE; 8137 pcbddcf->use_change_of_basis = PETSC_TRUE; 8138 pcbddcf->use_change_on_faces = PETSC_TRUE; 8139 pcbddcf->use_qr_single = PETSC_TRUE; 8140 pcbddcf->fake_change = PETSC_TRUE; 8141 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8142 /* store information on primal vertices and change of basis (in local numbering) */ 8143 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8144 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8145 change = pcbddcf->ConstraintMatrix; 8146 pcbddcf->ConstraintMatrix = NULL; 8147 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8148 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8149 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8150 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8151 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8152 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8153 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8154 pcf->ops->destroy = NULL; 8155 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8156 } 8157 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8158 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8159 ierr = MatDestroy(&change);CHKERRQ(ierr); 8160 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8161 } 8162 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8163 8164 /* free adjacency */ 8165 if (free_used_adj) { 8166 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8167 } 8168 PetscFunctionReturn(0); 8169 } 8170 8171 #undef __FUNCT__ 8172 #define __FUNCT__ "PCBDDCInitSubSchurs" 8173 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8174 { 8175 PC_IS *pcis=(PC_IS*)pc->data; 8176 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8177 PCBDDCGraph graph; 8178 PetscErrorCode ierr; 8179 8180 PetscFunctionBegin; 8181 /* attach interface graph for determining subsets */ 8182 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8183 IS verticesIS,verticescomm; 8184 PetscInt vsize,*idxs; 8185 8186 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8187 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8188 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8189 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8190 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8191 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8192 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8193 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8194 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8195 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8196 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8197 } else { 8198 graph = pcbddc->mat_graph; 8199 } 8200 /* print some info */ 8201 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8202 IS vertices; 8203 PetscInt nv,nedges,nfaces; 8204 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8205 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8206 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8207 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8208 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8209 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8210 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8211 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8212 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8213 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8214 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8215 } 8216 8217 /* sub_schurs init */ 8218 if (!pcbddc->sub_schurs) { 8219 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8220 } 8221 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8222 8223 /* free graph struct */ 8224 if (pcbddc->sub_schurs_rebuild) { 8225 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8226 } 8227 PetscFunctionReturn(0); 8228 } 8229 8230 #undef __FUNCT__ 8231 #define __FUNCT__ "PCBDDCCheckOperator" 8232 PetscErrorCode PCBDDCCheckOperator(PC pc) 8233 { 8234 PC_IS *pcis=(PC_IS*)pc->data; 8235 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8236 PetscErrorCode ierr; 8237 8238 PetscFunctionBegin; 8239 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8240 IS zerodiag = NULL; 8241 Mat S_j,B0_B=NULL; 8242 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8243 PetscScalar *p0_check,*array,*array2; 8244 PetscReal norm; 8245 PetscInt i; 8246 8247 /* B0 and B0_B */ 8248 if (zerodiag) { 8249 IS dummy; 8250 8251 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8252 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8253 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8254 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8255 } 8256 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8257 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8258 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8259 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8260 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8261 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8262 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8263 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8264 /* S_j */ 8265 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8266 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8267 8268 /* mimic vector in \widetilde{W}_\Gamma */ 8269 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8270 /* continuous in primal space */ 8271 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8272 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8273 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8274 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8275 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8276 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8277 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8278 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8279 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8280 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8281 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8282 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8283 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8284 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8285 8286 /* assemble rhs for coarse problem */ 8287 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8288 /* local with Schur */ 8289 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8290 if (zerodiag) { 8291 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8292 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8293 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8294 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8295 } 8296 /* sum on primal nodes the local contributions */ 8297 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8298 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8299 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8300 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8301 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8302 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8303 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8304 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8305 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8306 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8307 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8308 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8309 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8310 /* scale primal nodes (BDDC sums contibutions) */ 8311 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8312 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8313 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8314 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8315 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8316 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8317 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8318 /* global: \widetilde{B0}_B w_\Gamma */ 8319 if (zerodiag) { 8320 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8321 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8322 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8323 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8324 } 8325 /* BDDC */ 8326 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8327 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8328 8329 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8330 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8331 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8332 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8333 for (i=0;i<pcbddc->benign_n;i++) { 8334 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8335 } 8336 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8337 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8338 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8339 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8340 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8341 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8342 } 8343 PetscFunctionReturn(0); 8344 } 8345 8346 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8347 #undef __FUNCT__ 8348 #define __FUNCT__ "MatMPIAIJRestrict" 8349 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8350 { 8351 Mat At; 8352 IS rows; 8353 PetscInt rst,ren; 8354 PetscErrorCode ierr; 8355 PetscLayout rmap; 8356 8357 PetscFunctionBegin; 8358 rst = ren = 0; 8359 if (ccomm != MPI_COMM_NULL) { 8360 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8361 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8362 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8363 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8364 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8365 } 8366 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8367 ierr = MatGetSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8368 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8369 8370 if (ccomm != MPI_COMM_NULL) { 8371 Mat_MPIAIJ *a,*b; 8372 IS from,to; 8373 Vec gvec; 8374 PetscInt lsize; 8375 8376 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8377 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8378 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8379 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8380 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8381 a = (Mat_MPIAIJ*)At->data; 8382 b = (Mat_MPIAIJ*)(*B)->data; 8383 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8384 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8385 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8386 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8387 b->A = a->A; 8388 b->B = a->B; 8389 8390 b->donotstash = a->donotstash; 8391 b->roworiented = a->roworiented; 8392 b->rowindices = 0; 8393 b->rowvalues = 0; 8394 b->getrowactive = PETSC_FALSE; 8395 8396 (*B)->rmap = rmap; 8397 (*B)->factortype = A->factortype; 8398 (*B)->assembled = PETSC_TRUE; 8399 (*B)->insertmode = NOT_SET_VALUES; 8400 (*B)->preallocated = PETSC_TRUE; 8401 8402 if (a->colmap) { 8403 #if defined(PETSC_USE_CTABLE) 8404 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8405 #else 8406 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8407 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8408 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8409 #endif 8410 } else b->colmap = 0; 8411 if (a->garray) { 8412 PetscInt len; 8413 len = a->B->cmap->n; 8414 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8415 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8416 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8417 } else b->garray = 0; 8418 8419 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8420 b->lvec = a->lvec; 8421 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8422 8423 /* cannot use VecScatterCopy */ 8424 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8425 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8426 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8427 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8428 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8429 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8430 ierr = ISDestroy(&from);CHKERRQ(ierr); 8431 ierr = ISDestroy(&to);CHKERRQ(ierr); 8432 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8433 } 8434 ierr = MatDestroy(&At);CHKERRQ(ierr); 8435 PetscFunctionReturn(0); 8436 } 8437