1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* if range is true, it returns B s.t. span{B} = range(A) 10 if range is false, it returns B s.t. range(B) _|_ range(A) */ 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseOrthogonalRangeOrComplement" 13 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 14 { 15 #if !defined(PETSC_USE_COMPLEX) 16 PetscScalar *uwork,*data,*U, ds = 0.; 17 PetscReal *sing; 18 PetscBLASInt bM,bN,lwork,lierr,di = 1; 19 PetscInt ulw,i,nr,nc,n; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 #if defined(PETSC_MISSING_LAPACK_GESVD) 24 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 25 #else 26 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 49 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 50 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 51 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 52 ierr = PetscFPTrapPop();CHKERRQ(ierr); 53 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 54 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 55 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 56 if (!rwork) { 57 ierr = PetscFree(sing);CHKERRQ(ierr); 58 } 59 if (!work) { 60 ierr = PetscFree(uwork);CHKERRQ(ierr); 61 } 62 /* create B */ 63 if (!range) { 64 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 65 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 67 } else { 68 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 69 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 70 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 71 } 72 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscFree(U);CHKERRQ(ierr); 74 #endif 75 #else /* PETSC_USE_COMPLEX */ 76 PetscFunctionBegin; 77 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 78 #endif 79 PetscFunctionReturn(0); 80 } 81 82 /* TODO REMOVE */ 83 #if defined(PRINT_GDET) 84 static int inc = 0; 85 static int lev = 0; 86 #endif 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatGetSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "PCBDDCNedelecSupport" 157 PetscErrorCode PCBDDCNedelecSupport(PC pc) 158 { 159 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 160 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 161 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 162 Vec tvec; 163 PetscSF sfv; 164 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 165 MPI_Comm comm; 166 IS lned,primals,allprimals,nedfieldlocal; 167 IS *eedges,*extrows,*extcols,*alleedges; 168 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 169 PetscScalar *vals,*work; 170 PetscReal *rwork; 171 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 172 PetscInt ne,nv,Lv,order,n,field; 173 PetscInt n_neigh,*neigh,*n_shared,**shared; 174 PetscInt i,j,extmem,cum,maxsize,nee; 175 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 176 PetscInt *sfvleaves,*sfvroots; 177 PetscInt *corners,*cedges; 178 PetscInt *ecount,**eneighs,*vcount,**vneighs; 179 #if defined(PETSC_USE_DEBUG) 180 PetscInt *emarks; 181 #endif 182 PetscBool print,eerr,done,lrc[2],conforming,global; 183 PetscErrorCode ierr; 184 185 PetscFunctionBegin; 186 /* test variable order code and print debug info TODO: to be removed */ 187 print = PETSC_FALSE; 188 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 189 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 190 191 /* Return to caller if there are no edges in the decomposition */ 192 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 193 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 194 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 195 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 196 lrc[0] = PETSC_FALSE; 197 for (i=0;i<n;i++) { 198 if (PetscRealPart(vals[i]) > 2.) { 199 lrc[0] = PETSC_TRUE; 200 break; 201 } 202 } 203 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 204 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 205 if (!lrc[1]) PetscFunctionReturn(0); 206 207 /* If the discrete gradient is defined for a subset of dofs and global is true, 208 it assumes G is given in global ordering for all the dofs. 209 Otherwise, the ordering is global for the Nedelec field */ 210 order = pcbddc->nedorder; 211 conforming = pcbddc->conforming; 212 field = pcbddc->nedfield; 213 global = pcbddc->nedglobal; 214 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 215 if (pcbddc->n_ISForDofsLocal && field > -1) { 216 PetscBool setprimal = PETSC_FALSE; 217 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field_primal",&setprimal,NULL);CHKERRQ(ierr); 218 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 219 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 220 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 221 if (setprimal) { 222 IS enedfieldlocal; 223 PetscInt *eidxs; 224 225 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 226 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 227 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 228 for (i=0,cum=0;i<ne;i++) { 229 if (PetscRealPart(vals[idxs[i]]) > 2.) { 230 eidxs[cum++] = idxs[i]; 231 } 232 } 233 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 234 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 235 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 236 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 237 ierr = PetscFree(eidxs);CHKERRQ(ierr); 238 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 239 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 } else if (!pcbddc->n_ISForDofsLocal) { 243 PetscBool testnedfield = PETSC_FALSE; 244 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 245 if (!testnedfield) { 246 ne = n; 247 nedfieldlocal = NULL; 248 } else { 249 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 250 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 251 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 252 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 253 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 254 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 255 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 256 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 257 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 258 for (i=0,cum=0;i<n;i++) { 259 if (matis->sf_leafdata[i] > 1) { 260 matis->sf_leafdata[cum++] = i; 261 } 262 } 263 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 264 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 265 } 266 global = PETSC_TRUE; 267 } else { 268 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 269 } 270 271 if (nedfieldlocal) { /* merge with previous code when testing is done */ 272 IS is; 273 274 /* need to map from the local Nedelec field to local numbering */ 275 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 276 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 277 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 278 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 279 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 280 if (global) { 281 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 282 el2g = al2g; 283 } else { 284 IS gis; 285 286 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 287 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 288 ierr = ISDestroy(&gis);CHKERRQ(ierr); 289 } 290 ierr = ISDestroy(&is);CHKERRQ(ierr); 291 } else { 292 /* restore default */ 293 pcbddc->nedfield = -1; 294 /* one ref for the destruction of al2g, one for el2g */ 295 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 296 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 297 el2g = al2g; 298 fl2g = NULL; 299 } 300 301 /* Sanity checks */ 302 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 303 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 304 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 305 306 /* Drop connections for interior edges */ 307 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 308 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 309 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 310 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 311 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 312 if (nedfieldlocal) { 313 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 314 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 315 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 316 } else { 317 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 318 } 319 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 320 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 321 if (global) { 322 PetscInt rst; 323 324 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 325 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 326 if (matis->sf_rootdata[i] < 2) { 327 matis->sf_rootdata[cum++] = i + rst; 328 } 329 } 330 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 331 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 332 } else { 333 PetscInt *tbz; 334 335 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 336 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 337 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 338 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 339 for (i=0,cum=0;i<ne;i++) 340 if (matis->sf_leafdata[idxs[i]] == 1) 341 tbz[cum++] = i; 342 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 343 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 344 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 345 ierr = PetscFree(tbz);CHKERRQ(ierr); 346 } 347 348 /* Extract subdomain relevant rows of G */ 349 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 350 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 351 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 352 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 353 ierr = ISDestroy(&lned);CHKERRQ(ierr); 354 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 355 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 356 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 357 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 358 if (print) { 359 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 360 ierr = MatView(lG,NULL);CHKERRQ(ierr); 361 } 362 363 /* SF for nodal communications */ 364 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 365 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 366 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 367 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 368 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 369 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 370 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 371 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 372 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 373 374 /* Destroy temporary G created in MATIS format and modified G */ 375 ierr = MatDestroy(&G);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 377 378 /* Save lG */ 379 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 380 381 /* Analyze the edge-nodes connections (duplicate lG) */ 382 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 383 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 384 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 385 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 386 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 387 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 388 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 389 /* need to import the boundary specification to ensure the 390 proper detection of coarse edges' endpoints */ 391 if (pcbddc->DirichletBoundariesLocal) { 392 IS is; 393 394 if (fl2g) { 395 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 396 } else { 397 is = pcbddc->DirichletBoundariesLocal; 398 } 399 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 400 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 401 for (i=0;i<cum;i++) { 402 if (idxs[i] >= 0) { 403 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 404 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 405 } 406 } 407 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 408 if (fl2g) { 409 ierr = ISDestroy(&is);CHKERRQ(ierr); 410 } 411 } 412 if (pcbddc->NeumannBoundariesLocal) { 413 IS is; 414 415 if (fl2g) { 416 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 417 } else { 418 is = pcbddc->NeumannBoundariesLocal; 419 } 420 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 421 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 422 for (i=0;i<cum;i++) { 423 if (idxs[i] >= 0) { 424 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 425 } 426 } 427 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 428 if (fl2g) { 429 ierr = ISDestroy(&is);CHKERRQ(ierr); 430 } 431 } 432 433 /* count neighs per dof */ 434 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 435 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 436 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 437 for (i=1,cum=0;i<n_neigh;i++) { 438 cum += n_shared[i]; 439 for (j=0;j<n_shared[i];j++) { 440 ecount[shared[i][j]]++; 441 } 442 } 443 if (ne) { 444 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 445 } 446 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 447 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 448 for (i=1;i<n_neigh;i++) { 449 for (j=0;j<n_shared[i];j++) { 450 PetscInt k = shared[i][j]; 451 eneighs[k][ecount[k]] = neigh[i]; 452 ecount[k]++; 453 } 454 } 455 for (i=0;i<ne;i++) { 456 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 457 } 458 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 459 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 460 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 461 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 462 for (i=1,cum=0;i<n_neigh;i++) { 463 cum += n_shared[i]; 464 for (j=0;j<n_shared[i];j++) { 465 vcount[shared[i][j]]++; 466 } 467 } 468 if (nv) { 469 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 470 } 471 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 472 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 473 for (i=1;i<n_neigh;i++) { 474 for (j=0;j<n_shared[i];j++) { 475 PetscInt k = shared[i][j]; 476 vneighs[k][vcount[k]] = neigh[i]; 477 vcount[k]++; 478 } 479 } 480 for (i=0;i<nv;i++) { 481 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 482 } 483 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 484 485 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 486 for proper detection of coarse edges' endpoints */ 487 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 488 for (i=0;i<ne;i++) { 489 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 490 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 491 } 492 } 493 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 494 if (!conforming) { 495 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 496 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 497 } 498 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 499 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 500 cum = 0; 501 for (i=0;i<ne;i++) { 502 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 503 if (!PetscBTLookup(btee,i)) { 504 marks[cum++] = i; 505 continue; 506 } 507 /* set badly connected edge dofs as primal */ 508 if (!conforming) { 509 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 510 marks[cum++] = i; 511 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 512 for (j=ii[i];j<ii[i+1];j++) { 513 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 514 } 515 } else { 516 /* every edge dofs should be connected trough a certain number of nodal dofs 517 to other edge dofs belonging to coarse edges 518 - at most 2 endpoints 519 - order-1 interior nodal dofs 520 - no undefined nodal dofs (nconn < order) 521 */ 522 PetscInt ends = 0,ints = 0, undef = 0; 523 for (j=ii[i];j<ii[i+1];j++) { 524 PetscInt v = jj[j],k; 525 PetscInt nconn = iit[v+1]-iit[v]; 526 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 527 if (nconn > order) ends++; 528 else if (nconn == order) ints++; 529 else undef++; 530 } 531 if (undef || ends > 2 || ints != order -1) { 532 marks[cum++] = i; 533 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 534 for (j=ii[i];j<ii[i+1];j++) { 535 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 536 } 537 } 538 } 539 } 540 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 541 if (!order && ii[i+1] != ii[i]) { 542 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 543 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 544 } 545 } 546 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 547 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 548 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 549 if (!conforming) { 550 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 551 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 552 } 553 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 554 555 /* identify splitpoints and corner candidates */ 556 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 557 if (print) { 558 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 559 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 560 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 561 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 562 } 563 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 564 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 565 for (i=0;i<nv;i++) { 566 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 567 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 568 if (!order) { /* variable order */ 569 PetscReal vorder = 0.; 570 571 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 572 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 573 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 574 ord = 1; 575 } 576 #if defined(PETSC_USE_DEBUG) 577 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 578 #endif 579 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 580 if (PetscBTLookup(btbd,jj[j])) { 581 bdir = PETSC_TRUE; 582 break; 583 } 584 if (vc != ecount[jj[j]]) { 585 sneighs = PETSC_FALSE; 586 } else { 587 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 588 for (k=0;k<vc;k++) { 589 if (vn[k] != en[k]) { 590 sneighs = PETSC_FALSE; 591 break; 592 } 593 } 594 } 595 } 596 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 597 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 598 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 599 } else if (test == ord) { 600 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 601 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 602 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 603 } else { 604 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 605 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 606 } 607 } 608 } 609 ierr = PetscFree(ecount);CHKERRQ(ierr); 610 ierr = PetscFree(vcount);CHKERRQ(ierr); 611 if (ne) { 612 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 613 } 614 if (nv) { 615 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 616 } 617 ierr = PetscFree(eneighs);CHKERRQ(ierr); 618 ierr = PetscFree(vneighs);CHKERRQ(ierr); 619 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 620 621 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 622 if (order != 1) { 623 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 624 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 625 for (i=0;i<nv;i++) { 626 if (PetscBTLookup(btvcand,i)) { 627 PetscBool found = PETSC_FALSE; 628 for (j=ii[i];j<ii[i+1] && !found;j++) { 629 PetscInt k,e = jj[j]; 630 if (PetscBTLookup(bte,e)) continue; 631 for (k=iit[e];k<iit[e+1];k++) { 632 PetscInt v = jjt[k]; 633 if (v != i && PetscBTLookup(btvcand,v)) { 634 found = PETSC_TRUE; 635 break; 636 } 637 } 638 } 639 if (!found) { 640 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 641 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 642 } else { 643 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 644 } 645 } 646 } 647 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 } 649 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 650 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 651 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 652 653 /* Get the local G^T explicitly */ 654 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 655 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 656 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 657 658 /* Mark interior nodal dofs */ 659 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 660 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 661 for (i=1;i<n_neigh;i++) { 662 for (j=0;j<n_shared[i];j++) { 663 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 664 } 665 } 666 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 667 668 /* communicate corners and splitpoints */ 669 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 670 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 671 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 672 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 673 674 if (print) { 675 IS tbz; 676 677 cum = 0; 678 for (i=0;i<nv;i++) 679 if (sfvleaves[i]) 680 vmarks[cum++] = i; 681 682 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 683 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 684 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 685 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 686 } 687 688 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 689 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 690 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 691 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 692 693 /* Zero rows of lGt corresponding to identified corners 694 and interior nodal dofs */ 695 cum = 0; 696 for (i=0;i<nv;i++) { 697 if (sfvleaves[i]) { 698 vmarks[cum++] = i; 699 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 700 } 701 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 702 } 703 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 704 if (print) { 705 IS tbz; 706 707 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 708 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 709 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 710 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 711 } 712 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 713 ierr = PetscFree(vmarks);CHKERRQ(ierr); 714 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 715 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 716 717 /* Recompute G */ 718 ierr = MatDestroy(&lG);CHKERRQ(ierr); 719 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 720 if (print) { 721 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 722 ierr = MatView(lG,NULL);CHKERRQ(ierr); 723 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 724 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 725 } 726 727 /* Get primal dofs (if any) */ 728 cum = 0; 729 for (i=0;i<ne;i++) { 730 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 731 } 732 if (fl2g) { 733 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 734 } 735 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 736 if (print) { 737 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 738 ierr = ISView(primals,NULL);CHKERRQ(ierr); 739 } 740 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 741 /* TODO: what if the user passed in some of them ? */ 742 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 743 ierr = ISDestroy(&primals);CHKERRQ(ierr); 744 745 /* Compute edge connectivity */ 746 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 747 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 748 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 749 if (fl2g) { 750 PetscBT btf; 751 PetscInt *iia,*jja,*iiu,*jju; 752 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 753 754 /* create CSR for all local dofs */ 755 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 756 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 757 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 758 iiu = pcbddc->mat_graph->xadj; 759 jju = pcbddc->mat_graph->adjncy; 760 } else if (pcbddc->use_local_adj) { 761 rest = PETSC_TRUE; 762 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 763 } else { 764 free = PETSC_TRUE; 765 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 766 iiu[0] = 0; 767 for (i=0;i<n;i++) { 768 iiu[i+1] = i+1; 769 jju[i] = -1; 770 } 771 } 772 773 /* import sizes of CSR */ 774 iia[0] = 0; 775 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 776 777 /* overwrite entries corresponding to the Nedelec field */ 778 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 779 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 780 for (i=0;i<ne;i++) { 781 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 782 iia[idxs[i]+1] = ii[i+1]-ii[i]; 783 } 784 785 /* iia in CSR */ 786 for (i=0;i<n;i++) iia[i+1] += iia[i]; 787 788 /* jja in CSR */ 789 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 790 for (i=0;i<n;i++) 791 if (!PetscBTLookup(btf,i)) 792 for (j=0;j<iiu[i+1]-iiu[i];j++) 793 jja[iia[i]+j] = jju[iiu[i]+j]; 794 795 /* map edge dofs connectivity */ 796 if (jj) { 797 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 798 for (i=0;i<ne;i++) { 799 PetscInt e = idxs[i]; 800 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 801 } 802 } 803 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 804 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 805 if (rest) { 806 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 807 } 808 if (free) { 809 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 810 } 811 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 812 } else { 813 if (jj) { 814 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 815 } 816 } 817 818 /* Analyze interface for edge dofs */ 819 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 820 821 /* Get coarse edges in the edge space */ 822 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 823 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 824 825 if (fl2g) { 826 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 827 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 828 for (i=0;i<nee;i++) { 829 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 830 } 831 } else { 832 eedges = alleedges; 833 primals = allprimals; 834 } 835 836 /* Mark fine edge dofs with their coarse edge id */ 837 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 838 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 839 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 840 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 841 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 842 if (print) { 843 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 844 ierr = ISView(primals,NULL);CHKERRQ(ierr); 845 } 846 847 maxsize = 0; 848 for (i=0;i<nee;i++) { 849 PetscInt size,mark = i+1; 850 851 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 852 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 853 for (j=0;j<size;j++) marks[idxs[j]] = mark; 854 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 855 maxsize = PetscMax(maxsize,size); 856 } 857 858 /* Find coarse edge endpoints */ 859 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 860 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 861 for (i=0;i<nee;i++) { 862 PetscInt mark = i+1,size; 863 864 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 865 if (!size && nedfieldlocal) continue; 866 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 867 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 868 if (print) { 869 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 870 ISView(eedges[i],NULL); 871 } 872 for (j=0;j<size;j++) { 873 PetscInt k, ee = idxs[j]; 874 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 875 for (k=ii[ee];k<ii[ee+1];k++) { 876 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 877 if (PetscBTLookup(btv,jj[k])) { 878 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 879 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 880 PetscInt k2; 881 PetscBool corner = PETSC_FALSE; 882 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 883 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 884 /* it's a corner if either is connected with an edge dof belonging to a different cc or 885 if the edge dof lie on the natural part of the boundary */ 886 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 887 corner = PETSC_TRUE; 888 break; 889 } 890 } 891 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 892 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 893 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 894 } else { 895 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 896 } 897 } 898 } 899 } 900 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 901 } 902 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 903 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 904 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 905 906 /* Reset marked primal dofs */ 907 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 908 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 909 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 910 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 911 912 /* Now use the initial lG */ 913 ierr = MatDestroy(&lG);CHKERRQ(ierr); 914 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 915 lG = lGinit; 916 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 917 918 /* Compute extended cols indices */ 919 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 920 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 921 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 922 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 923 i *= maxsize; 924 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 925 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 926 eerr = PETSC_FALSE; 927 for (i=0;i<nee;i++) { 928 PetscInt size,found = 0; 929 930 cum = 0; 931 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 932 if (!size && nedfieldlocal) continue; 933 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 934 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 935 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 936 for (j=0;j<size;j++) { 937 PetscInt k,ee = idxs[j]; 938 for (k=ii[ee];k<ii[ee+1];k++) { 939 PetscInt vv = jj[k]; 940 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 941 else if (!PetscBTLookupSet(btvc,vv)) found++; 942 } 943 } 944 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 945 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 946 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 947 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 948 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 949 /* it may happen that endpoints are not defined at this point 950 if it is the case, mark this edge for a second pass */ 951 if (cum != size -1 || found != 2) { 952 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 953 if (print) { 954 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 955 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 956 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 957 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 958 } 959 eerr = PETSC_TRUE; 960 } 961 } 962 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 963 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 964 if (done) { 965 PetscInt *newprimals; 966 967 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 968 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 969 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 970 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 971 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 972 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 973 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 974 for (i=0;i<nee;i++) { 975 PetscBool has_candidates = PETSC_FALSE; 976 if (PetscBTLookup(bter,i)) { 977 PetscInt size,mark = i+1; 978 979 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 980 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 981 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 982 for (j=0;j<size;j++) { 983 PetscInt k,ee = idxs[j]; 984 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 985 for (k=ii[ee];k<ii[ee+1];k++) { 986 /* set all candidates located on the edge as corners */ 987 if (PetscBTLookup(btvcand,jj[k])) { 988 PetscInt k2,vv = jj[k]; 989 has_candidates = PETSC_TRUE; 990 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 991 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 992 /* set all edge dofs connected to candidate as primals */ 993 for (k2=iit[vv];k2<iit[vv+1];k2++) { 994 if (marks[jjt[k2]] == mark) { 995 PetscInt k3,ee2 = jjt[k2]; 996 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 997 newprimals[cum++] = ee2; 998 /* finally set the new corners */ 999 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1000 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1001 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1002 } 1003 } 1004 } 1005 } else { 1006 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1007 } 1008 } 1009 } 1010 if (!has_candidates) { /* circular edge */ 1011 PetscInt k, ee = idxs[0],*tmarks; 1012 1013 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1014 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1015 for (k=ii[ee];k<ii[ee+1];k++) { 1016 PetscInt k2; 1017 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1018 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1019 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1020 } 1021 for (j=0;j<size;j++) { 1022 if (tmarks[idxs[j]] > 1) { 1023 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1024 newprimals[cum++] = idxs[j]; 1025 } 1026 } 1027 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1028 } 1029 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1030 } 1031 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(extcols);CHKERRQ(ierr); 1034 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1035 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1036 if (fl2g) { 1037 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1038 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1039 for (i=0;i<nee;i++) { 1040 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1041 } 1042 ierr = PetscFree(eedges);CHKERRQ(ierr); 1043 } 1044 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1045 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1046 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1047 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1048 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1049 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1050 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1051 if (fl2g) { 1052 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1053 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1054 for (i=0;i<nee;i++) { 1055 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1056 } 1057 } else { 1058 eedges = alleedges; 1059 primals = allprimals; 1060 } 1061 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1062 1063 /* Mark again */ 1064 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1065 for (i=0;i<nee;i++) { 1066 PetscInt size,mark = i+1; 1067 1068 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1069 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1070 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1071 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1072 } 1073 if (print) { 1074 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1075 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1076 } 1077 1078 /* Recompute extended cols */ 1079 eerr = PETSC_FALSE; 1080 for (i=0;i<nee;i++) { 1081 PetscInt size; 1082 1083 cum = 0; 1084 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1085 if (!size && nedfieldlocal) continue; 1086 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1087 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1088 for (j=0;j<size;j++) { 1089 PetscInt k,ee = idxs[j]; 1090 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1091 } 1092 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1094 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1095 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1096 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1097 if (cum != size -1) { 1098 if (print) { 1099 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1100 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1101 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1102 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1103 } 1104 eerr = PETSC_TRUE; 1105 } 1106 } 1107 } 1108 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1109 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1110 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1111 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1112 /* an error should not occur at this point */ 1113 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1114 1115 /* Check the number of endpoints */ 1116 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1117 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1118 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1119 for (i=0;i<nee;i++) { 1120 PetscInt size, found = 0, gc[2]; 1121 1122 /* init with defaults */ 1123 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1124 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1125 if (!size && nedfieldlocal) continue; 1126 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1127 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1128 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1129 for (j=0;j<size;j++) { 1130 PetscInt k,ee = idxs[j]; 1131 for (k=ii[ee];k<ii[ee+1];k++) { 1132 PetscInt vv = jj[k]; 1133 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1134 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1135 corners[i*2+found++] = vv; 1136 } 1137 } 1138 } 1139 if (found != 2) { 1140 PetscInt e; 1141 if (fl2g) { 1142 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1143 } else { 1144 e = idxs[0]; 1145 } 1146 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1147 } 1148 1149 /* get primal dof index on this coarse edge */ 1150 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1151 if (gc[0] > gc[1]) { 1152 PetscInt swap = corners[2*i]; 1153 corners[2*i] = corners[2*i+1]; 1154 corners[2*i+1] = swap; 1155 } 1156 cedges[i] = idxs[size-1]; 1157 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1158 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1159 } 1160 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1161 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1162 1163 #if defined(PETSC_USE_DEBUG) 1164 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1165 not interfere with neighbouring coarse edges */ 1166 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1167 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1168 for (i=0;i<nv;i++) { 1169 PetscInt emax = 0,eemax = 0; 1170 1171 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1172 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1173 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1174 for (j=1;j<nee+1;j++) { 1175 if (emax < emarks[j]) { 1176 emax = emarks[j]; 1177 eemax = j; 1178 } 1179 } 1180 /* not relevant for edges */ 1181 if (!eemax) continue; 1182 1183 for (j=ii[i];j<ii[i+1];j++) { 1184 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1185 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1186 } 1187 } 1188 } 1189 ierr = PetscFree(emarks);CHKERRQ(ierr); 1190 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 #endif 1192 1193 /* Compute extended rows indices for edge blocks of the change of basis */ 1194 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1195 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1196 extmem *= maxsize; 1197 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1198 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1199 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1200 for (i=0;i<nv;i++) { 1201 PetscInt mark = 0,size,start; 1202 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1203 for (j=ii[i];j<ii[i+1];j++) 1204 if (marks[jj[j]] && !mark) 1205 mark = marks[jj[j]]; 1206 1207 /* not relevant */ 1208 if (!mark) continue; 1209 1210 /* import extended row */ 1211 mark--; 1212 start = mark*extmem+extrowcum[mark]; 1213 size = ii[i+1]-ii[i]; 1214 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1215 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1216 extrowcum[mark] += size; 1217 } 1218 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1219 cum = 0; 1220 for (i=0;i<nee;i++) { 1221 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1222 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1223 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1224 cum = PetscMax(cum,size); 1225 } 1226 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1227 ierr = PetscFree(marks);CHKERRQ(ierr); 1228 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1229 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1230 1231 /* Workspace for lapack inner calls and VecSetValues */ 1232 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1233 1234 /* Create change of basis matrix (preallocation can be improved) */ 1235 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1236 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1237 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1238 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1239 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1240 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1241 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1242 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1243 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1244 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1245 1246 /* Defaults to identity */ 1247 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1248 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1249 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1250 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1251 1252 /* Create discrete gradient for the coarser level if needed */ 1253 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1254 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1255 if (pcbddc->current_level < pcbddc->max_levels) { 1256 ISLocalToGlobalMapping cel2g,cvl2g; 1257 IS wis,gwis; 1258 PetscInt cnv,cne; 1259 1260 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1261 if (fl2g) { 1262 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1263 } else { 1264 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1265 pcbddc->nedclocal = wis; 1266 } 1267 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1268 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1269 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1270 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1271 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1272 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1273 1274 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1275 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1276 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1277 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1278 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1279 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1280 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1281 1282 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1283 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1284 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1285 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1286 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1287 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1288 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1289 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1290 } 1291 1292 #if defined(PRINT_GDET) 1293 inc = 0; 1294 lev = pcbddc->current_level; 1295 #endif 1296 for (i=0;i<nee;i++) { 1297 Mat Gins = NULL, GKins = NULL; 1298 IS cornersis = NULL; 1299 PetscScalar cvals[2]; 1300 1301 if (pcbddc->nedcG) { 1302 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1303 } 1304 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1305 if (Gins && GKins) { 1306 PetscScalar *data; 1307 const PetscInt *rows,*cols; 1308 PetscInt nrh,nch,nrc,ncc; 1309 1310 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1311 /* H1 */ 1312 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1313 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1314 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1315 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1316 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1317 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1318 /* complement */ 1319 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1320 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1321 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 1322 if (ncc != 1 && pcbddc->nedcG) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the dicrete gradient for the next level with ncc %d",ncc); 1323 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1324 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1325 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1326 1327 /* coarse discrete gradient */ 1328 if (pcbddc->nedcG) { 1329 PetscInt cols[2]; 1330 1331 cols[0] = 2*i; 1332 cols[1] = 2*i+1; 1333 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1334 } 1335 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1336 } 1337 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1338 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1339 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1340 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1341 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1342 } 1343 1344 /* Start assembling */ 1345 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1346 if (pcbddc->nedcG) { 1347 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1348 } 1349 1350 /* Free */ 1351 if (fl2g) { 1352 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1353 for (i=0;i<nee;i++) { 1354 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1355 } 1356 ierr = PetscFree(eedges);CHKERRQ(ierr); 1357 } 1358 1359 /* hack mat_graph with primal dofs on the coarse edges */ 1360 { 1361 PCBDDCGraph graph = pcbddc->mat_graph; 1362 PetscInt *oqueue = graph->queue; 1363 PetscInt *ocptr = graph->cptr; 1364 PetscInt ncc,*idxs; 1365 1366 /* find first primal edge */ 1367 if (pcbddc->nedclocal) { 1368 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1369 } else { 1370 if (fl2g) { 1371 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1372 } 1373 idxs = cedges; 1374 } 1375 cum = 0; 1376 while (cum < nee && cedges[cum] < 0) cum++; 1377 1378 /* adapt connected components */ 1379 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1380 graph->cptr[0] = 0; 1381 for (i=0,ncc=0;i<graph->ncc;i++) { 1382 PetscInt lc = ocptr[i+1]-ocptr[i]; 1383 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1384 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1385 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1386 ncc++; 1387 lc--; 1388 cum++; 1389 while (cum < nee && cedges[cum] < 0) cum++; 1390 } 1391 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1392 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1393 ncc++; 1394 } 1395 graph->ncc = ncc; 1396 if (pcbddc->nedclocal) { 1397 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1398 } 1399 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1400 } 1401 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1402 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1403 1404 1405 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1406 ierr = PetscFree(extrow);CHKERRQ(ierr); 1407 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1408 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1409 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1410 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1411 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1412 ierr = PetscFree(corners);CHKERRQ(ierr); 1413 ierr = PetscFree(cedges);CHKERRQ(ierr); 1414 ierr = PetscFree(extrows);CHKERRQ(ierr); 1415 ierr = PetscFree(extcols);CHKERRQ(ierr); 1416 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1417 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1418 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1419 1420 /* Complete assembling */ 1421 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1422 if (pcbddc->nedcG) { 1423 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1424 #if 0 1425 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1426 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1427 #endif 1428 } 1429 1430 /* set change of basis */ 1431 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1432 #if 0 1433 if (pcbddc->current_level) { 1434 PetscViewer viewer; 1435 char filename[256]; 1436 Mat Tned; 1437 IS sub; 1438 PetscInt rst; 1439 1440 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1441 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 1442 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 1443 if (nedfieldlocal) { 1444 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1445 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 1446 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1447 } else { 1448 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 1449 } 1450 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1451 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1452 ierr = MatGetOwnershipRange(pc->pmat,&rst,NULL);CHKERRQ(ierr); 1453 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 1454 if (matis->sf_rootdata[i]) { 1455 matis->sf_rootdata[cum++] = i + rst; 1456 } 1457 } 1458 PetscPrintf(PETSC_COMM_SELF,"[%D] LEVEL %d MY ne %d cum %d\n",PetscGlobalRank,pcbddc->current_level,ne,cum); 1459 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cum,matis->sf_rootdata,PETSC_USE_POINTER,&sub);CHKERRQ(ierr); 1460 ierr = MatGetSubMatrix(T,sub,sub,MAT_INITIAL_MATRIX,&Tned);CHKERRQ(ierr); 1461 ierr = ISDestroy(&sub);CHKERRQ(ierr); 1462 1463 sprintf(filename,"Change_l%d.m",pcbddc->current_level); 1464 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)Tned),filename,&viewer);CHKERRQ(ierr); 1465 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1466 ierr = PetscObjectSetName((PetscObject)Tned,"T");CHKERRQ(ierr); 1467 ierr = MatView(Tned,viewer);CHKERRQ(ierr); 1468 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1469 ierr = MatDestroy(&Tned);CHKERRQ(ierr); 1470 } 1471 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1472 #endif 1473 ierr = MatDestroy(&T);CHKERRQ(ierr); 1474 1475 PetscFunctionReturn(0); 1476 } 1477 1478 /* the near-null space of BDDC carries information on quadrature weights, 1479 and these can be collinear -> so cheat with MatNullSpaceCreate 1480 and create a suitable set of basis vectors first */ 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1483 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1484 { 1485 PetscErrorCode ierr; 1486 PetscInt i; 1487 1488 PetscFunctionBegin; 1489 for (i=0;i<nvecs;i++) { 1490 PetscInt first,last; 1491 1492 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1493 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1494 if (i>=first && i < last) { 1495 PetscScalar *data; 1496 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1497 if (!has_const) { 1498 data[i-first] = 1.; 1499 } else { 1500 data[2*i-first] = 1./PetscSqrtReal(2.); 1501 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1502 } 1503 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1504 } 1505 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1506 } 1507 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1508 for (i=0;i<nvecs;i++) { /* reset vectors */ 1509 PetscInt first,last; 1510 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1511 if (i>=first && i < last) { 1512 PetscScalar *data; 1513 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1514 if (!has_const) { 1515 data[i-first] = 0.; 1516 } else { 1517 data[2*i-first] = 0.; 1518 data[2*i-first+1] = 0.; 1519 } 1520 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1521 } 1522 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1529 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1530 { 1531 Mat loc_divudotp; 1532 Vec p,v,vins,quad_vec,*quad_vecs; 1533 ISLocalToGlobalMapping map; 1534 IS *faces,*edges; 1535 PetscScalar *vals; 1536 const PetscScalar *array; 1537 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1538 PetscMPIInt rank; 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1543 if (graph->twodim) { 1544 lmaxneighs = 2; 1545 } else { 1546 lmaxneighs = 1; 1547 for (i=0;i<ne;i++) { 1548 const PetscInt *idxs; 1549 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1550 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1551 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1552 } 1553 lmaxneighs++; /* graph count does not include self */ 1554 } 1555 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1556 maxsize = 0; 1557 for (i=0;i<ne;i++) { 1558 PetscInt nn; 1559 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1560 maxsize = PetscMax(maxsize,nn); 1561 } 1562 for (i=0;i<nf;i++) { 1563 PetscInt nn; 1564 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1565 maxsize = PetscMax(maxsize,nn); 1566 } 1567 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1568 /* create vectors to hold quadrature weights */ 1569 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1570 if (!transpose) { 1571 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1572 } else { 1573 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1574 } 1575 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1576 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1577 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1578 for (i=0;i<maxneighs;i++) { 1579 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1580 } 1581 1582 /* compute local quad vec */ 1583 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1584 if (!transpose) { 1585 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1586 } else { 1587 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1588 } 1589 ierr = VecSet(p,1.);CHKERRQ(ierr); 1590 if (!transpose) { 1591 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1592 } else { 1593 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1594 } 1595 if (vl2l) { 1596 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1597 } else { 1598 vins = v; 1599 } 1600 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1601 ierr = VecDestroy(&p);CHKERRQ(ierr); 1602 1603 /* insert in global quadrature vecs */ 1604 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1605 for (i=0;i<nf;i++) { 1606 const PetscInt *idxs; 1607 PetscInt idx,nn,j; 1608 1609 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1610 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1611 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1612 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1613 idx = -(idx+1); 1614 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1615 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1616 } 1617 for (i=0;i<ne;i++) { 1618 const PetscInt *idxs; 1619 PetscInt idx,nn,j; 1620 1621 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1622 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1623 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1624 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1625 idx = -(idx+1); 1626 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1627 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1628 } 1629 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1630 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1631 if (vl2l) { 1632 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1633 } 1634 ierr = VecDestroy(&v);CHKERRQ(ierr); 1635 ierr = PetscFree(vals);CHKERRQ(ierr); 1636 1637 /* assemble near null space */ 1638 for (i=0;i<maxneighs;i++) { 1639 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1640 } 1641 for (i=0;i<maxneighs;i++) { 1642 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1643 } 1644 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1651 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1652 { 1653 PetscErrorCode ierr; 1654 Vec local,global; 1655 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1656 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1657 1658 PetscFunctionBegin; 1659 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1660 /* need to convert from global to local topology information and remove references to information in global ordering */ 1661 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1662 if (pcbddc->user_provided_isfordofs) { 1663 if (pcbddc->n_ISForDofs) { 1664 PetscInt i; 1665 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1666 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1667 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1668 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1669 } 1670 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1671 pcbddc->n_ISForDofs = 0; 1672 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1673 } 1674 } else { 1675 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1676 PetscInt i, n = matis->A->rmap->n; 1677 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1678 if (i > 1) { 1679 pcbddc->n_ISForDofsLocal = i; 1680 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1681 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1682 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1683 } 1684 } 1685 } 1686 } 1687 1688 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1689 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1690 } 1691 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1692 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1693 } 1694 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1695 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1696 } 1697 ierr = VecDestroy(&global);CHKERRQ(ierr); 1698 ierr = VecDestroy(&local);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1704 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1705 { 1706 PC_IS *pcis = (PC_IS*)(pc->data); 1707 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1708 PetscErrorCode ierr; 1709 1710 PetscFunctionBegin; 1711 if (!pcbddc->benign_have_null) { 1712 PetscFunctionReturn(0); 1713 } 1714 if (pcbddc->ChangeOfBasisMatrix) { 1715 Vec swap; 1716 1717 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1718 swap = pcbddc->work_change; 1719 pcbddc->work_change = r; 1720 r = swap; 1721 } 1722 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1723 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1724 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1725 ierr = VecSet(z,0.);CHKERRQ(ierr); 1726 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1727 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1728 if (pcbddc->ChangeOfBasisMatrix) { 1729 pcbddc->work_change = r; 1730 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1731 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1732 } 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1738 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1739 { 1740 PCBDDCBenignMatMult_ctx ctx; 1741 PetscErrorCode ierr; 1742 PetscBool apply_right,apply_left,reset_x; 1743 1744 PetscFunctionBegin; 1745 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1746 if (transpose) { 1747 apply_right = ctx->apply_left; 1748 apply_left = ctx->apply_right; 1749 } else { 1750 apply_right = ctx->apply_right; 1751 apply_left = ctx->apply_left; 1752 } 1753 reset_x = PETSC_FALSE; 1754 if (apply_right) { 1755 const PetscScalar *ax; 1756 PetscInt nl,i; 1757 1758 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1759 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1760 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1761 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1762 for (i=0;i<ctx->benign_n;i++) { 1763 PetscScalar sum,val; 1764 const PetscInt *idxs; 1765 PetscInt nz,j; 1766 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1767 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1768 sum = 0.; 1769 if (ctx->apply_p0) { 1770 val = ctx->work[idxs[nz-1]]; 1771 for (j=0;j<nz-1;j++) { 1772 sum += ctx->work[idxs[j]]; 1773 ctx->work[idxs[j]] += val; 1774 } 1775 } else { 1776 for (j=0;j<nz-1;j++) { 1777 sum += ctx->work[idxs[j]]; 1778 } 1779 } 1780 ctx->work[idxs[nz-1]] -= sum; 1781 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1782 } 1783 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1784 reset_x = PETSC_TRUE; 1785 } 1786 if (transpose) { 1787 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1788 } else { 1789 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1790 } 1791 if (reset_x) { 1792 ierr = VecResetArray(x);CHKERRQ(ierr); 1793 } 1794 if (apply_left) { 1795 PetscScalar *ay; 1796 PetscInt i; 1797 1798 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1799 for (i=0;i<ctx->benign_n;i++) { 1800 PetscScalar sum,val; 1801 const PetscInt *idxs; 1802 PetscInt nz,j; 1803 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1804 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1805 val = -ay[idxs[nz-1]]; 1806 if (ctx->apply_p0) { 1807 sum = 0.; 1808 for (j=0;j<nz-1;j++) { 1809 sum += ay[idxs[j]]; 1810 ay[idxs[j]] += val; 1811 } 1812 ay[idxs[nz-1]] += sum; 1813 } else { 1814 for (j=0;j<nz-1;j++) { 1815 ay[idxs[j]] += val; 1816 } 1817 ay[idxs[nz-1]] = 0.; 1818 } 1819 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1820 } 1821 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1822 } 1823 PetscFunctionReturn(0); 1824 } 1825 1826 #undef __FUNCT__ 1827 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1828 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1829 { 1830 PetscErrorCode ierr; 1831 1832 PetscFunctionBegin; 1833 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1834 PetscFunctionReturn(0); 1835 } 1836 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1839 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1840 { 1841 PetscErrorCode ierr; 1842 1843 PetscFunctionBegin; 1844 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "PCBDDCBenignShellMat" 1850 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1851 { 1852 PC_IS *pcis = (PC_IS*)pc->data; 1853 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1854 PCBDDCBenignMatMult_ctx ctx; 1855 PetscErrorCode ierr; 1856 1857 PetscFunctionBegin; 1858 if (!restore) { 1859 Mat A_IB,A_BI; 1860 PetscScalar *work; 1861 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1862 1863 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1864 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1865 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1866 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1867 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1868 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1869 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1870 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1871 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1872 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1873 ctx->apply_left = PETSC_TRUE; 1874 ctx->apply_right = PETSC_FALSE; 1875 ctx->apply_p0 = PETSC_FALSE; 1876 ctx->benign_n = pcbddc->benign_n; 1877 if (reuse) { 1878 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1879 ctx->free = PETSC_FALSE; 1880 } else { /* TODO: could be optimized for successive solves */ 1881 ISLocalToGlobalMapping N_to_D; 1882 PetscInt i; 1883 1884 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1885 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1886 for (i=0;i<pcbddc->benign_n;i++) { 1887 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1888 } 1889 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1890 ctx->free = PETSC_TRUE; 1891 } 1892 ctx->A = pcis->A_IB; 1893 ctx->work = work; 1894 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1895 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1896 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1897 pcis->A_IB = A_IB; 1898 1899 /* A_BI as A_IB^T */ 1900 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1901 pcbddc->benign_original_mat = pcis->A_BI; 1902 pcis->A_BI = A_BI; 1903 } else { 1904 if (!pcbddc->benign_original_mat) { 1905 PetscFunctionReturn(0); 1906 } 1907 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1908 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1909 pcis->A_IB = ctx->A; 1910 ctx->A = NULL; 1911 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1912 pcis->A_BI = pcbddc->benign_original_mat; 1913 pcbddc->benign_original_mat = NULL; 1914 if (ctx->free) { 1915 PetscInt i; 1916 for (i=0;i<ctx->benign_n;i++) { 1917 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1918 } 1919 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1920 } 1921 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1922 ierr = PetscFree(ctx);CHKERRQ(ierr); 1923 } 1924 PetscFunctionReturn(0); 1925 } 1926 1927 /* used just in bddc debug mode */ 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "PCBDDCBenignProject" 1930 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1931 { 1932 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1933 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1934 Mat An; 1935 PetscErrorCode ierr; 1936 1937 PetscFunctionBegin; 1938 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1939 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1940 if (is1) { 1941 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1942 ierr = MatDestroy(&An);CHKERRQ(ierr); 1943 } else { 1944 *B = An; 1945 } 1946 PetscFunctionReturn(0); 1947 } 1948 1949 /* TODO: add reuse flag */ 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatSeqAIJCompress" 1952 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1953 { 1954 Mat Bt; 1955 PetscScalar *a,*bdata; 1956 const PetscInt *ii,*ij; 1957 PetscInt m,n,i,nnz,*bii,*bij; 1958 PetscBool flg_row; 1959 PetscErrorCode ierr; 1960 1961 PetscFunctionBegin; 1962 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1963 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1964 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1965 nnz = n; 1966 for (i=0;i<ii[n];i++) { 1967 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1968 } 1969 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1970 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1971 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1972 nnz = 0; 1973 bii[0] = 0; 1974 for (i=0;i<n;i++) { 1975 PetscInt j; 1976 for (j=ii[i];j<ii[i+1];j++) { 1977 PetscScalar entry = a[j]; 1978 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1979 bij[nnz] = ij[j]; 1980 bdata[nnz] = entry; 1981 nnz++; 1982 } 1983 } 1984 bii[i+1] = nnz; 1985 } 1986 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1987 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1988 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1989 { 1990 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1991 b->free_a = PETSC_TRUE; 1992 b->free_ij = PETSC_TRUE; 1993 } 1994 *B = Bt; 1995 PetscFunctionReturn(0); 1996 } 1997 1998 #undef __FUNCT__ 1999 #define __FUNCT__ "MatDetectDisconnectedComponents" 2000 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 2001 { 2002 Mat B; 2003 IS is_dummy,*cc_n; 2004 ISLocalToGlobalMapping l2gmap_dummy; 2005 PCBDDCGraph graph; 2006 PetscInt i,n; 2007 PetscInt *xadj,*adjncy; 2008 PetscInt *xadj_filtered,*adjncy_filtered; 2009 PetscBool flg_row,isseqaij; 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 if (!A->rmap->N || !A->cmap->N) { 2014 *ncc = 0; 2015 *cc = NULL; 2016 PetscFunctionReturn(0); 2017 } 2018 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2019 if (!isseqaij && filter) { 2020 PetscBool isseqdense; 2021 2022 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2023 if (!isseqdense) { 2024 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2025 } else { /* TODO: rectangular case and LDA */ 2026 PetscScalar *array; 2027 PetscReal chop=1.e-6; 2028 2029 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2030 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2031 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2032 for (i=0;i<n;i++) { 2033 PetscInt j; 2034 for (j=i+1;j<n;j++) { 2035 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2036 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2037 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2038 } 2039 } 2040 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2041 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2042 } 2043 } else { 2044 B = A; 2045 } 2046 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2047 2048 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2049 if (filter) { 2050 PetscScalar *data; 2051 PetscInt j,cum; 2052 2053 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2054 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2055 cum = 0; 2056 for (i=0;i<n;i++) { 2057 PetscInt t; 2058 2059 for (j=xadj[i];j<xadj[i+1];j++) { 2060 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2061 continue; 2062 } 2063 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2064 } 2065 t = xadj_filtered[i]; 2066 xadj_filtered[i] = cum; 2067 cum += t; 2068 } 2069 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2070 } else { 2071 xadj_filtered = NULL; 2072 adjncy_filtered = NULL; 2073 } 2074 2075 /* compute local connected components using PCBDDCGraph */ 2076 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2077 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2078 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2079 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2080 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2081 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2082 if (xadj_filtered) { 2083 graph->xadj = xadj_filtered; 2084 graph->adjncy = adjncy_filtered; 2085 } else { 2086 graph->xadj = xadj; 2087 graph->adjncy = adjncy; 2088 } 2089 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2090 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2091 /* partial clean up */ 2092 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2093 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2094 if (A != B) { 2095 ierr = MatDestroy(&B);CHKERRQ(ierr); 2096 } 2097 2098 /* get back data */ 2099 if (ncc) *ncc = graph->ncc; 2100 if (cc) { 2101 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2102 for (i=0;i<graph->ncc;i++) { 2103 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2104 } 2105 *cc = cc_n; 2106 } 2107 /* clean up graph */ 2108 graph->xadj = 0; 2109 graph->adjncy = 0; 2110 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 #undef __FUNCT__ 2115 #define __FUNCT__ "PCBDDCBenignCheck" 2116 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2117 { 2118 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2119 PC_IS* pcis = (PC_IS*)(pc->data); 2120 IS dirIS = NULL; 2121 PetscInt i; 2122 PetscErrorCode ierr; 2123 2124 PetscFunctionBegin; 2125 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2126 if (zerodiag) { 2127 Mat A; 2128 Vec vec3_N; 2129 PetscScalar *vals; 2130 const PetscInt *idxs; 2131 PetscInt nz,*count; 2132 2133 /* p0 */ 2134 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2135 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2136 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2137 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2138 for (i=0;i<nz;i++) vals[i] = 1.; 2139 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2140 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2141 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2142 /* v_I */ 2143 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2144 for (i=0;i<nz;i++) vals[i] = 0.; 2145 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2146 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2147 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2148 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2149 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2150 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2151 if (dirIS) { 2152 PetscInt n; 2153 2154 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2155 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2156 for (i=0;i<n;i++) vals[i] = 0.; 2157 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2158 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2159 } 2160 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2161 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2162 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2163 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2164 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2165 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2166 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2167 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2168 ierr = PetscFree(vals);CHKERRQ(ierr); 2169 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2170 2171 /* there should not be any pressure dofs lying on the interface */ 2172 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2173 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2174 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2175 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2176 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2177 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2178 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2179 ierr = PetscFree(count);CHKERRQ(ierr); 2180 } 2181 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2182 2183 /* check PCBDDCBenignGetOrSetP0 */ 2184 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2185 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2186 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2187 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2188 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2189 for (i=0;i<pcbddc->benign_n;i++) { 2190 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2191 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2192 } 2193 PetscFunctionReturn(0); 2194 } 2195 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 2198 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2199 { 2200 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2201 IS pressures,zerodiag,*zerodiag_subs; 2202 PetscInt nz,n; 2203 PetscInt *interior_dofs,n_interior_dofs; 2204 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2209 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2210 for (n=0;n<pcbddc->benign_n;n++) { 2211 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2212 } 2213 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2214 pcbddc->benign_n = 0; 2215 /* if a local info on dofs is present, assumes that the last field represents "pressures" 2216 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2217 Checks if all the pressure dofs in each subdomain have a zero diagonal 2218 If not, a change of basis on pressures is not needed 2219 since the local Schur complements are already SPD 2220 */ 2221 has_null_pressures = PETSC_TRUE; 2222 have_null = PETSC_TRUE; 2223 if (pcbddc->n_ISForDofsLocal) { 2224 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2225 2226 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2227 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2228 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2229 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2230 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2231 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2232 if (!sorted) { 2233 ierr = ISSort(pressures);CHKERRQ(ierr); 2234 } 2235 } else { 2236 pressures = NULL; 2237 } 2238 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2239 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2240 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2241 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2242 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2243 if (!sorted) { 2244 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2245 } 2246 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2247 if (!nz) { 2248 if (n) have_null = PETSC_FALSE; 2249 has_null_pressures = PETSC_FALSE; 2250 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2251 } 2252 recompute_zerodiag = PETSC_FALSE; 2253 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2254 zerodiag_subs = NULL; 2255 pcbddc->benign_n = 0; 2256 n_interior_dofs = 0; 2257 interior_dofs = NULL; 2258 if (pcbddc->current_level) { /* need to compute interior nodes */ 2259 PetscInt n,i,j; 2260 PetscInt n_neigh,*neigh,*n_shared,**shared; 2261 PetscInt *iwork; 2262 2263 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2264 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2265 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2266 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2267 for (i=1;i<n_neigh;i++) 2268 for (j=0;j<n_shared[i];j++) 2269 iwork[shared[i][j]] += 1; 2270 for (i=0;i<n;i++) 2271 if (!iwork[i]) 2272 interior_dofs[n_interior_dofs++] = i; 2273 ierr = PetscFree(iwork);CHKERRQ(ierr); 2274 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2275 } 2276 if (has_null_pressures) { 2277 IS *subs; 2278 PetscInt nsubs,i,j,nl; 2279 const PetscInt *idxs; 2280 PetscScalar *array; 2281 Vec *work; 2282 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2283 2284 subs = pcbddc->local_subs; 2285 nsubs = pcbddc->n_local_subs; 2286 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2287 if (pcbddc->current_level) { 2288 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2289 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2290 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2291 /* work[0] = 1_p */ 2292 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2293 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2294 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2295 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2296 /* work[0] = 1_v */ 2297 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2298 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2299 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2300 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2301 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2302 } 2303 if (nsubs > 1) { 2304 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2305 for (i=0;i<nsubs;i++) { 2306 ISLocalToGlobalMapping l2g; 2307 IS t_zerodiag_subs; 2308 PetscInt nl; 2309 2310 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2311 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2312 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2313 if (nl) { 2314 PetscBool valid = PETSC_TRUE; 2315 2316 if (pcbddc->current_level) { 2317 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2318 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2319 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2320 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2321 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2322 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2323 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2324 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2325 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2326 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2327 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2328 for (j=0;j<n_interior_dofs;j++) { 2329 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2330 valid = PETSC_FALSE; 2331 break; 2332 } 2333 } 2334 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2335 } 2336 if (valid && pcbddc->NeumannBoundariesLocal) { 2337 IS t_bc; 2338 PetscInt nzb; 2339 2340 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 2341 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 2342 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 2343 if (nzb) valid = PETSC_FALSE; 2344 } 2345 if (valid && pressures) { 2346 IS t_pressure_subs; 2347 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2348 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2349 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2350 } 2351 if (valid) { 2352 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2353 pcbddc->benign_n++; 2354 } else { 2355 recompute_zerodiag = PETSC_TRUE; 2356 } 2357 } 2358 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2359 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2360 } 2361 } else { /* there's just one subdomain (or zero if they have not been detected */ 2362 PetscBool valid = PETSC_TRUE; 2363 2364 if (pcbddc->NeumannBoundariesLocal) { 2365 PetscInt nzb; 2366 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 2367 if (nzb) valid = PETSC_FALSE; 2368 } 2369 if (valid && pressures) { 2370 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2371 } 2372 if (valid && pcbddc->current_level) { 2373 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2374 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2375 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2376 for (j=0;j<n_interior_dofs;j++) { 2377 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2378 valid = PETSC_FALSE; 2379 break; 2380 } 2381 } 2382 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2383 } 2384 if (valid) { 2385 pcbddc->benign_n = 1; 2386 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2387 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2388 zerodiag_subs[0] = zerodiag; 2389 } 2390 } 2391 if (pcbddc->current_level) { 2392 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2393 } 2394 } 2395 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2396 2397 if (!pcbddc->benign_n) { 2398 PetscInt n; 2399 2400 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2401 recompute_zerodiag = PETSC_FALSE; 2402 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2403 if (n) { 2404 has_null_pressures = PETSC_FALSE; 2405 have_null = PETSC_FALSE; 2406 } 2407 } 2408 2409 /* final check for null pressures */ 2410 if (zerodiag && pressures) { 2411 PetscInt nz,np; 2412 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2413 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2414 if (nz != np) have_null = PETSC_FALSE; 2415 } 2416 2417 if (recompute_zerodiag) { 2418 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2419 if (pcbddc->benign_n == 1) { 2420 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2421 zerodiag = zerodiag_subs[0]; 2422 } else { 2423 PetscInt i,nzn,*new_idxs; 2424 2425 nzn = 0; 2426 for (i=0;i<pcbddc->benign_n;i++) { 2427 PetscInt ns; 2428 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2429 nzn += ns; 2430 } 2431 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2432 nzn = 0; 2433 for (i=0;i<pcbddc->benign_n;i++) { 2434 PetscInt ns,*idxs; 2435 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2436 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2437 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2438 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2439 nzn += ns; 2440 } 2441 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2442 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2443 } 2444 have_null = PETSC_FALSE; 2445 } 2446 2447 /* Prepare matrix to compute no-net-flux */ 2448 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2449 Mat A,loc_divudotp; 2450 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2451 IS row,col,isused = NULL; 2452 PetscInt M,N,n,st,n_isused; 2453 2454 if (pressures) { 2455 isused = pressures; 2456 } else { 2457 isused = zerodiag; 2458 } 2459 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2460 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2461 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2462 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2463 n_isused = 0; 2464 if (isused) { 2465 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2466 } 2467 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2468 st = st-n_isused; 2469 if (n) { 2470 const PetscInt *gidxs; 2471 2472 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2473 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2474 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2475 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2476 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2477 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2478 } else { 2479 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2480 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2481 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2482 } 2483 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2484 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2485 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2486 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2487 ierr = ISDestroy(&row);CHKERRQ(ierr); 2488 ierr = ISDestroy(&col);CHKERRQ(ierr); 2489 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2490 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2491 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2492 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2493 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2494 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2495 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2496 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2497 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2498 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2499 } 2500 2501 /* change of basis and p0 dofs */ 2502 if (has_null_pressures) { 2503 IS zerodiagc; 2504 const PetscInt *idxs,*idxsc; 2505 PetscInt i,s,*nnz; 2506 2507 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2508 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2509 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2510 /* local change of basis for pressures */ 2511 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2512 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2513 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2514 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2515 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2516 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2517 for (i=0;i<pcbddc->benign_n;i++) { 2518 PetscInt nzs,j; 2519 2520 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2521 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2522 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2523 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2524 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2525 } 2526 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2527 ierr = PetscFree(nnz);CHKERRQ(ierr); 2528 /* set identity on velocities */ 2529 for (i=0;i<n-nz;i++) { 2530 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2531 } 2532 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2533 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2534 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2535 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2536 /* set change on pressures */ 2537 for (s=0;s<pcbddc->benign_n;s++) { 2538 PetscScalar *array; 2539 PetscInt nzs; 2540 2541 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2542 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2543 for (i=0;i<nzs-1;i++) { 2544 PetscScalar vals[2]; 2545 PetscInt cols[2]; 2546 2547 cols[0] = idxs[i]; 2548 cols[1] = idxs[nzs-1]; 2549 vals[0] = 1.; 2550 vals[1] = 1.; 2551 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2552 } 2553 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2554 for (i=0;i<nzs-1;i++) array[i] = -1.; 2555 array[nzs-1] = 1.; 2556 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2557 /* store local idxs for p0 */ 2558 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2559 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2560 ierr = PetscFree(array);CHKERRQ(ierr); 2561 } 2562 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2563 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2564 /* project if needed */ 2565 if (pcbddc->benign_change_explicit) { 2566 Mat M; 2567 2568 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2569 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2570 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2571 ierr = MatDestroy(&M);CHKERRQ(ierr); 2572 } 2573 /* store global idxs for p0 */ 2574 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2575 } 2576 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2577 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2578 2579 /* determines if the coarse solver will be singular or not */ 2580 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2581 /* determines if the problem has subdomains with 0 pressure block */ 2582 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2583 *zerodiaglocal = zerodiag; 2584 PetscFunctionReturn(0); 2585 } 2586 2587 #undef __FUNCT__ 2588 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2589 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2590 { 2591 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2592 PetscScalar *array; 2593 PetscErrorCode ierr; 2594 2595 PetscFunctionBegin; 2596 if (!pcbddc->benign_sf) { 2597 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2598 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2599 } 2600 if (get) { 2601 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2602 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2603 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2604 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2605 } else { 2606 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2607 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2608 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2609 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2610 } 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2616 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2617 { 2618 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2619 PetscErrorCode ierr; 2620 2621 PetscFunctionBegin; 2622 /* TODO: add error checking 2623 - avoid nested pop (or push) calls. 2624 - cannot push before pop. 2625 - cannot call this if pcbddc->local_mat is NULL 2626 */ 2627 if (!pcbddc->benign_n) { 2628 PetscFunctionReturn(0); 2629 } 2630 if (pop) { 2631 if (pcbddc->benign_change_explicit) { 2632 IS is_p0; 2633 MatReuse reuse; 2634 2635 /* extract B_0 */ 2636 reuse = MAT_INITIAL_MATRIX; 2637 if (pcbddc->benign_B0) { 2638 reuse = MAT_REUSE_MATRIX; 2639 } 2640 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2641 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2642 /* remove rows and cols from local problem */ 2643 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2644 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2645 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2646 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2647 } else { 2648 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2649 PetscScalar *vals; 2650 PetscInt i,n,*idxs_ins; 2651 2652 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2653 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2654 if (!pcbddc->benign_B0) { 2655 PetscInt *nnz; 2656 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2657 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2658 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2659 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2660 for (i=0;i<pcbddc->benign_n;i++) { 2661 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2662 nnz[i] = n - nnz[i]; 2663 } 2664 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2665 ierr = PetscFree(nnz);CHKERRQ(ierr); 2666 } 2667 2668 for (i=0;i<pcbddc->benign_n;i++) { 2669 PetscScalar *array; 2670 PetscInt *idxs,j,nz,cum; 2671 2672 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2673 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2674 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2675 for (j=0;j<nz;j++) vals[j] = 1.; 2676 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2677 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2678 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2679 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2680 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2681 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2682 cum = 0; 2683 for (j=0;j<n;j++) { 2684 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2685 vals[cum] = array[j]; 2686 idxs_ins[cum] = j; 2687 cum++; 2688 } 2689 } 2690 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2691 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2692 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2693 } 2694 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2695 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2696 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2697 } 2698 } else { /* push */ 2699 if (pcbddc->benign_change_explicit) { 2700 PetscInt i; 2701 2702 for (i=0;i<pcbddc->benign_n;i++) { 2703 PetscScalar *B0_vals; 2704 PetscInt *B0_cols,B0_ncol; 2705 2706 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2707 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2708 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2709 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2710 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2711 } 2712 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2713 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2714 } else { 2715 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2716 } 2717 } 2718 PetscFunctionReturn(0); 2719 } 2720 2721 #undef __FUNCT__ 2722 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2723 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2724 { 2725 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2726 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2727 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2728 PetscBLASInt *B_iwork,*B_ifail; 2729 PetscScalar *work,lwork; 2730 PetscScalar *St,*S,*eigv; 2731 PetscScalar *Sarray,*Starray; 2732 PetscReal *eigs,thresh; 2733 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2734 PetscBool allocated_S_St; 2735 #if defined(PETSC_USE_COMPLEX) 2736 PetscReal *rwork; 2737 #endif 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2742 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2743 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2744 2745 if (pcbddc->dbg_flag) { 2746 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2747 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2748 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2749 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2750 } 2751 2752 if (pcbddc->dbg_flag) { 2753 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2754 } 2755 2756 /* max size of subsets */ 2757 mss = 0; 2758 for (i=0;i<sub_schurs->n_subs;i++) { 2759 PetscInt subset_size; 2760 2761 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2762 mss = PetscMax(mss,subset_size); 2763 } 2764 2765 /* min/max and threshold */ 2766 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2767 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2768 nmax = PetscMax(nmin,nmax); 2769 allocated_S_St = PETSC_FALSE; 2770 if (nmin) { 2771 allocated_S_St = PETSC_TRUE; 2772 } 2773 2774 /* allocate lapack workspace */ 2775 cum = cum2 = 0; 2776 maxneigs = 0; 2777 for (i=0;i<sub_schurs->n_subs;i++) { 2778 PetscInt n,subset_size; 2779 2780 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2781 n = PetscMin(subset_size,nmax); 2782 cum += subset_size; 2783 cum2 += subset_size*n; 2784 maxneigs = PetscMax(maxneigs,n); 2785 } 2786 if (mss) { 2787 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2788 PetscBLASInt B_itype = 1; 2789 PetscBLASInt B_N = mss; 2790 PetscReal zero = 0.0; 2791 PetscReal eps = 0.0; /* dlamch? */ 2792 2793 B_lwork = -1; 2794 S = NULL; 2795 St = NULL; 2796 eigs = NULL; 2797 eigv = NULL; 2798 B_iwork = NULL; 2799 B_ifail = NULL; 2800 #if defined(PETSC_USE_COMPLEX) 2801 rwork = NULL; 2802 #endif 2803 thresh = 1.0; 2804 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2805 #if defined(PETSC_USE_COMPLEX) 2806 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2807 #else 2808 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2809 #endif 2810 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2811 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2812 } else { 2813 /* TODO */ 2814 } 2815 } else { 2816 lwork = 0; 2817 } 2818 2819 nv = 0; 2820 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2821 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2822 } 2823 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2824 if (allocated_S_St) { 2825 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2826 } 2827 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2828 #if defined(PETSC_USE_COMPLEX) 2829 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2830 #endif 2831 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2832 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2833 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2834 nv+cum,&pcbddc->adaptive_constraints_idxs, 2835 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2836 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2837 2838 maxneigs = 0; 2839 cum = cumarray = 0; 2840 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2841 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2842 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2843 const PetscInt *idxs; 2844 2845 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2846 for (cum=0;cum<nv;cum++) { 2847 pcbddc->adaptive_constraints_n[cum] = 1; 2848 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2849 pcbddc->adaptive_constraints_data[cum] = 1.0; 2850 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2851 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2852 } 2853 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2854 } 2855 2856 if (mss) { /* multilevel */ 2857 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2858 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2859 } 2860 2861 thresh = pcbddc->adaptive_threshold; 2862 for (i=0;i<sub_schurs->n_subs;i++) { 2863 const PetscInt *idxs; 2864 PetscReal upper,lower; 2865 PetscInt j,subset_size,eigs_start = 0; 2866 PetscBLASInt B_N; 2867 PetscBool same_data = PETSC_FALSE; 2868 2869 if (pcbddc->use_deluxe_scaling) { 2870 upper = PETSC_MAX_REAL; 2871 lower = thresh; 2872 } else { 2873 upper = 1./thresh; 2874 lower = 0.; 2875 } 2876 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2877 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2878 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2879 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2880 if (sub_schurs->is_hermitian) { 2881 PetscInt j,k; 2882 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2883 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2884 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2885 } 2886 for (j=0;j<subset_size;j++) { 2887 for (k=j;k<subset_size;k++) { 2888 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2889 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2890 } 2891 } 2892 } else { 2893 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2894 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2895 } 2896 } else { 2897 S = Sarray + cumarray; 2898 St = Starray + cumarray; 2899 } 2900 /* see if we can save some work */ 2901 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2902 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2903 } 2904 2905 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2906 B_neigs = 0; 2907 } else { 2908 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2909 PetscBLASInt B_itype = 1; 2910 PetscBLASInt B_IL, B_IU; 2911 PetscReal eps = -1.0; /* dlamch? */ 2912 PetscInt nmin_s; 2913 PetscBool compute_range = PETSC_FALSE; 2914 2915 if (pcbddc->dbg_flag) { 2916 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2917 } 2918 2919 compute_range = PETSC_FALSE; 2920 if (thresh > 1.+PETSC_SMALL && !same_data) { 2921 compute_range = PETSC_TRUE; 2922 } 2923 2924 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2925 if (compute_range) { 2926 2927 /* ask for eigenvalues larger than thresh */ 2928 #if defined(PETSC_USE_COMPLEX) 2929 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2930 #else 2931 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2932 #endif 2933 } else if (!same_data) { 2934 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2935 B_IL = 1; 2936 #if defined(PETSC_USE_COMPLEX) 2937 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2938 #else 2939 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2940 #endif 2941 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2942 PetscInt k; 2943 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2944 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2945 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2946 nmin = nmax; 2947 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2948 for (k=0;k<nmax;k++) { 2949 eigs[k] = 1./PETSC_SMALL; 2950 eigv[k*(subset_size+1)] = 1.0; 2951 } 2952 } 2953 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2954 if (B_ierr) { 2955 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2956 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2957 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2958 } 2959 2960 if (B_neigs > nmax) { 2961 if (pcbddc->dbg_flag) { 2962 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2963 } 2964 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2965 B_neigs = nmax; 2966 } 2967 2968 nmin_s = PetscMin(nmin,B_N); 2969 if (B_neigs < nmin_s) { 2970 PetscBLASInt B_neigs2; 2971 2972 if (pcbddc->use_deluxe_scaling) { 2973 B_IL = B_N - nmin_s + 1; 2974 B_IU = B_N - B_neigs; 2975 } else { 2976 B_IL = B_neigs + 1; 2977 B_IU = nmin_s; 2978 } 2979 if (pcbddc->dbg_flag) { 2980 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2981 } 2982 if (sub_schurs->is_hermitian) { 2983 PetscInt j,k; 2984 for (j=0;j<subset_size;j++) { 2985 for (k=j;k<subset_size;k++) { 2986 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2987 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2988 } 2989 } 2990 } else { 2991 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2992 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2993 } 2994 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2995 #if defined(PETSC_USE_COMPLEX) 2996 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2997 #else 2998 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2999 #endif 3000 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3001 B_neigs += B_neigs2; 3002 } 3003 if (B_ierr) { 3004 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3005 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3006 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3007 } 3008 if (pcbddc->dbg_flag) { 3009 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3010 for (j=0;j<B_neigs;j++) { 3011 if (eigs[j] == 0.0) { 3012 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3013 } else { 3014 if (pcbddc->use_deluxe_scaling) { 3015 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3016 } else { 3017 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3018 } 3019 } 3020 } 3021 } 3022 } else { 3023 /* TODO */ 3024 } 3025 } 3026 /* change the basis back to the original one */ 3027 if (sub_schurs->change) { 3028 Mat change,phi,phit; 3029 3030 if (pcbddc->dbg_flag > 1) { 3031 PetscInt ii; 3032 for (ii=0;ii<B_neigs;ii++) { 3033 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3034 for (j=0;j<B_N;j++) { 3035 #if defined(PETSC_USE_COMPLEX) 3036 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3037 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3038 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3039 #else 3040 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3041 #endif 3042 } 3043 } 3044 } 3045 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3046 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3047 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3048 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3049 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3050 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3051 } 3052 maxneigs = PetscMax(B_neigs,maxneigs); 3053 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3054 if (B_neigs) { 3055 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3056 3057 if (pcbddc->dbg_flag > 1) { 3058 PetscInt ii; 3059 for (ii=0;ii<B_neigs;ii++) { 3060 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3061 for (j=0;j<B_N;j++) { 3062 #if defined(PETSC_USE_COMPLEX) 3063 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3064 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3065 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3066 #else 3067 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3068 #endif 3069 } 3070 } 3071 } 3072 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3073 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3074 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3075 cum++; 3076 } 3077 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3078 /* shift for next computation */ 3079 cumarray += subset_size*subset_size; 3080 } 3081 if (pcbddc->dbg_flag) { 3082 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3083 } 3084 3085 if (mss) { 3086 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3087 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3088 /* destroy matrices (junk) */ 3089 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3090 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3091 } 3092 if (allocated_S_St) { 3093 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3094 } 3095 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3096 #if defined(PETSC_USE_COMPLEX) 3097 ierr = PetscFree(rwork);CHKERRQ(ierr); 3098 #endif 3099 if (pcbddc->dbg_flag) { 3100 PetscInt maxneigs_r; 3101 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3102 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3103 } 3104 PetscFunctionReturn(0); 3105 } 3106 3107 #undef __FUNCT__ 3108 #define __FUNCT__ "PCBDDCSetUpSolvers" 3109 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3110 { 3111 PetscScalar *coarse_submat_vals; 3112 PetscErrorCode ierr; 3113 3114 PetscFunctionBegin; 3115 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3116 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3117 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3118 3119 /* Setup local neumann solver ksp_R */ 3120 /* PCBDDCSetUpLocalScatters should be called first! */ 3121 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3122 3123 /* 3124 Setup local correction and local part of coarse basis. 3125 Gives back the dense local part of the coarse matrix in column major ordering 3126 */ 3127 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3128 3129 /* Compute total number of coarse nodes and setup coarse solver */ 3130 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3131 3132 /* free */ 3133 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3134 PetscFunctionReturn(0); 3135 } 3136 3137 #undef __FUNCT__ 3138 #define __FUNCT__ "PCBDDCResetCustomization" 3139 PetscErrorCode PCBDDCResetCustomization(PC pc) 3140 { 3141 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3142 PetscErrorCode ierr; 3143 3144 PetscFunctionBegin; 3145 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3146 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3147 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3148 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3149 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3150 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3151 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3152 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3153 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3154 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3155 PetscFunctionReturn(0); 3156 } 3157 3158 #undef __FUNCT__ 3159 #define __FUNCT__ "PCBDDCResetTopography" 3160 PetscErrorCode PCBDDCResetTopography(PC pc) 3161 { 3162 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3163 PetscInt i; 3164 PetscErrorCode ierr; 3165 3166 PetscFunctionBegin; 3167 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3168 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3169 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3170 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3171 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3172 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3173 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3174 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3175 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3176 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3177 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3178 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3179 for (i=0;i<pcbddc->n_local_subs;i++) { 3180 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3181 } 3182 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3183 if (pcbddc->sub_schurs) { 3184 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 3185 } 3186 pcbddc->graphanalyzed = PETSC_FALSE; 3187 pcbddc->recompute_topography = PETSC_TRUE; 3188 PetscFunctionReturn(0); 3189 } 3190 3191 #undef __FUNCT__ 3192 #define __FUNCT__ "PCBDDCResetSolvers" 3193 PetscErrorCode PCBDDCResetSolvers(PC pc) 3194 { 3195 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3196 PetscErrorCode ierr; 3197 3198 PetscFunctionBegin; 3199 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3200 if (pcbddc->coarse_phi_B) { 3201 PetscScalar *array; 3202 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3203 ierr = PetscFree(array);CHKERRQ(ierr); 3204 } 3205 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3206 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3207 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3208 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3209 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3210 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3211 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3212 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3213 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3214 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3215 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3216 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3217 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3218 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3219 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 3220 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 3221 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3222 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3223 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3224 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3225 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3226 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3227 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3228 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3229 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3230 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3231 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3232 if (pcbddc->benign_zerodiag_subs) { 3233 PetscInt i; 3234 for (i=0;i<pcbddc->benign_n;i++) { 3235 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3236 } 3237 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3238 } 3239 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3240 PetscFunctionReturn(0); 3241 } 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 3245 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3246 { 3247 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3248 PC_IS *pcis = (PC_IS*)pc->data; 3249 VecType impVecType; 3250 PetscInt n_constraints,n_R,old_size; 3251 PetscErrorCode ierr; 3252 3253 PetscFunctionBegin; 3254 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3255 n_R = pcis->n - pcbddc->n_vertices; 3256 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3257 /* local work vectors (try to avoid unneeded work)*/ 3258 /* R nodes */ 3259 old_size = -1; 3260 if (pcbddc->vec1_R) { 3261 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3262 } 3263 if (n_R != old_size) { 3264 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3265 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3266 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3267 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3268 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3269 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3270 } 3271 /* local primal dofs */ 3272 old_size = -1; 3273 if (pcbddc->vec1_P) { 3274 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3275 } 3276 if (pcbddc->local_primal_size != old_size) { 3277 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3278 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3279 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3280 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3281 } 3282 /* local explicit constraints */ 3283 old_size = -1; 3284 if (pcbddc->vec1_C) { 3285 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3286 } 3287 if (n_constraints && n_constraints != old_size) { 3288 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3289 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3290 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3291 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3292 } 3293 PetscFunctionReturn(0); 3294 } 3295 3296 #undef __FUNCT__ 3297 #define __FUNCT__ "PCBDDCSetUpCorrection" 3298 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3299 { 3300 PetscErrorCode ierr; 3301 /* pointers to pcis and pcbddc */ 3302 PC_IS* pcis = (PC_IS*)pc->data; 3303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3304 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3305 /* submatrices of local problem */ 3306 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3307 /* submatrices of local coarse problem */ 3308 Mat S_VV,S_CV,S_VC,S_CC; 3309 /* working matrices */ 3310 Mat C_CR; 3311 /* additional working stuff */ 3312 PC pc_R; 3313 Mat F; 3314 Vec dummy_vec; 3315 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3316 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3317 PetscScalar *work; 3318 PetscInt *idx_V_B; 3319 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3320 PetscInt i,n_R,n_D,n_B; 3321 3322 /* some shortcuts to scalars */ 3323 PetscScalar one=1.0,m_one=-1.0; 3324 3325 PetscFunctionBegin; 3326 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3327 3328 /* Set Non-overlapping dimensions */ 3329 n_vertices = pcbddc->n_vertices; 3330 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3331 n_B = pcis->n_B; 3332 n_D = pcis->n - n_B; 3333 n_R = pcis->n - n_vertices; 3334 3335 /* vertices in boundary numbering */ 3336 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3337 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3338 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3339 3340 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3341 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3342 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3343 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3344 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3345 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3346 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3347 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3348 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3349 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3350 3351 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3352 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3353 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3354 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3355 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3356 lda_rhs = n_R; 3357 need_benign_correction = PETSC_FALSE; 3358 if (isLU || isILU || isCHOL) { 3359 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3360 } else if (sub_schurs && sub_schurs->reuse_solver) { 3361 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3362 MatFactorType type; 3363 3364 F = reuse_solver->F; 3365 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3366 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3367 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3368 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3369 } else { 3370 F = NULL; 3371 } 3372 3373 /* allocate workspace */ 3374 n = 0; 3375 if (n_constraints) { 3376 n += lda_rhs*n_constraints; 3377 } 3378 if (n_vertices) { 3379 n = PetscMax(2*lda_rhs*n_vertices,n); 3380 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3381 } 3382 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3383 3384 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3385 dummy_vec = NULL; 3386 if (need_benign_correction && lda_rhs != n_R && F) { 3387 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3388 } 3389 3390 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3391 if (n_constraints) { 3392 Mat M1,M2,M3,C_B; 3393 IS is_aux; 3394 PetscScalar *array,*array2; 3395 3396 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3397 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3398 3399 /* Extract constraints on R nodes: C_{CR} */ 3400 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3401 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3402 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3403 3404 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3405 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3406 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3407 for (i=0;i<n_constraints;i++) { 3408 const PetscScalar *row_cmat_values; 3409 const PetscInt *row_cmat_indices; 3410 PetscInt size_of_constraint,j; 3411 3412 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3413 for (j=0;j<size_of_constraint;j++) { 3414 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3415 } 3416 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3417 } 3418 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3419 if (F) { 3420 Mat B; 3421 3422 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3423 if (need_benign_correction) { 3424 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3425 3426 /* rhs is already zero on interior dofs, no need to change the rhs */ 3427 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3428 } 3429 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3430 if (need_benign_correction) { 3431 PetscScalar *marr; 3432 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3433 3434 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3435 if (lda_rhs != n_R) { 3436 for (i=0;i<n_constraints;i++) { 3437 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3438 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3439 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3440 } 3441 } else { 3442 for (i=0;i<n_constraints;i++) { 3443 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3444 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3445 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3446 } 3447 } 3448 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3449 } 3450 ierr = MatDestroy(&B);CHKERRQ(ierr); 3451 } else { 3452 PetscScalar *marr; 3453 3454 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3455 for (i=0;i<n_constraints;i++) { 3456 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3457 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3458 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3459 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3460 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3461 } 3462 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3463 } 3464 if (!pcbddc->switch_static) { 3465 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3466 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3467 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3468 for (i=0;i<n_constraints;i++) { 3469 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3470 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3471 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3472 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3473 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3474 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3475 } 3476 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3477 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3478 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3479 } else { 3480 if (lda_rhs != n_R) { 3481 IS dummy; 3482 3483 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3484 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3485 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3486 } else { 3487 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3488 pcbddc->local_auxmat2 = local_auxmat2_R; 3489 } 3490 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3491 } 3492 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3493 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3494 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3495 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3496 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3497 if (isCHOL) { 3498 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3499 } else { 3500 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3501 } 3502 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3503 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3504 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3505 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3506 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3507 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3508 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3509 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3510 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3511 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3512 } 3513 3514 /* Get submatrices from subdomain matrix */ 3515 if (n_vertices) { 3516 IS is_aux; 3517 3518 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3519 IS tis; 3520 3521 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3522 ierr = ISSort(tis);CHKERRQ(ierr); 3523 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3524 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3525 } else { 3526 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3527 } 3528 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3529 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3530 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3531 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3532 } 3533 3534 /* Matrix of coarse basis functions (local) */ 3535 if (pcbddc->coarse_phi_B) { 3536 PetscInt on_B,on_primal,on_D=n_D; 3537 if (pcbddc->coarse_phi_D) { 3538 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3539 } 3540 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3541 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3542 PetscScalar *marray; 3543 3544 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3545 ierr = PetscFree(marray);CHKERRQ(ierr); 3546 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3547 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3548 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3549 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3550 } 3551 } 3552 3553 if (!pcbddc->coarse_phi_B) { 3554 PetscScalar *marray; 3555 3556 n = n_B*pcbddc->local_primal_size; 3557 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3558 n += n_D*pcbddc->local_primal_size; 3559 } 3560 if (!pcbddc->symmetric_primal) { 3561 n *= 2; 3562 } 3563 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3564 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3565 n = n_B*pcbddc->local_primal_size; 3566 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3568 n += n_D*pcbddc->local_primal_size; 3569 } 3570 if (!pcbddc->symmetric_primal) { 3571 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3572 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3573 n = n_B*pcbddc->local_primal_size; 3574 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3575 } 3576 } else { 3577 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3578 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3579 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3580 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3581 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3582 } 3583 } 3584 } 3585 3586 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3587 p0_lidx_I = NULL; 3588 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3589 const PetscInt *idxs; 3590 3591 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3592 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3593 for (i=0;i<pcbddc->benign_n;i++) { 3594 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3595 } 3596 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3597 } 3598 3599 /* vertices */ 3600 if (n_vertices) { 3601 3602 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3603 3604 if (n_R) { 3605 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3606 PetscBLASInt B_N,B_one = 1; 3607 PetscScalar *x,*y; 3608 PetscBool isseqaij; 3609 3610 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3611 if (need_benign_correction) { 3612 ISLocalToGlobalMapping RtoN; 3613 IS is_p0; 3614 PetscInt *idxs_p0,n; 3615 3616 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3617 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3618 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3619 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3620 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3621 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3622 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3623 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3624 } 3625 3626 if (lda_rhs == n_R) { 3627 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3628 } else { 3629 PetscScalar *av,*array; 3630 const PetscInt *xadj,*adjncy; 3631 PetscInt n; 3632 PetscBool flg_row; 3633 3634 array = work+lda_rhs*n_vertices; 3635 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3636 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3637 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3638 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3639 for (i=0;i<n;i++) { 3640 PetscInt j; 3641 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3642 } 3643 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3644 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3645 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3646 } 3647 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3648 if (need_benign_correction) { 3649 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3650 PetscScalar *marr; 3651 3652 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3653 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3654 3655 | 0 0 0 | (V) 3656 L = | 0 0 -1 | (P-p0) 3657 | 0 0 -1 | (p0) 3658 3659 */ 3660 for (i=0;i<reuse_solver->benign_n;i++) { 3661 const PetscScalar *vals; 3662 const PetscInt *idxs,*idxs_zero; 3663 PetscInt n,j,nz; 3664 3665 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3666 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3667 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3668 for (j=0;j<n;j++) { 3669 PetscScalar val = vals[j]; 3670 PetscInt k,col = idxs[j]; 3671 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3672 } 3673 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3674 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3675 } 3676 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3677 } 3678 if (F) { 3679 /* need to correct the rhs */ 3680 if (need_benign_correction) { 3681 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3682 PetscScalar *marr; 3683 3684 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3685 if (lda_rhs != n_R) { 3686 for (i=0;i<n_vertices;i++) { 3687 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3688 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3689 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3690 } 3691 } else { 3692 for (i=0;i<n_vertices;i++) { 3693 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3694 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3695 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3696 } 3697 } 3698 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3699 } 3700 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3701 /* need to correct the solution */ 3702 if (need_benign_correction) { 3703 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3704 PetscScalar *marr; 3705 3706 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3707 if (lda_rhs != n_R) { 3708 for (i=0;i<n_vertices;i++) { 3709 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3710 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3711 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3712 } 3713 } else { 3714 for (i=0;i<n_vertices;i++) { 3715 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3716 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3717 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3718 } 3719 } 3720 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3721 } 3722 } else { 3723 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3724 for (i=0;i<n_vertices;i++) { 3725 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3726 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3727 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3728 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3729 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3730 } 3731 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3732 } 3733 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3734 /* S_VV and S_CV */ 3735 if (n_constraints) { 3736 Mat B; 3737 3738 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3739 for (i=0;i<n_vertices;i++) { 3740 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3741 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3742 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3743 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3744 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3745 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3746 } 3747 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3748 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3749 ierr = MatDestroy(&B);CHKERRQ(ierr); 3750 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3751 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3752 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3753 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3754 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3755 ierr = MatDestroy(&B);CHKERRQ(ierr); 3756 } 3757 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3758 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3759 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3760 } 3761 if (lda_rhs != n_R) { 3762 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3764 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3765 } 3766 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3767 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3768 if (need_benign_correction) { 3769 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3770 PetscScalar *marr,*sums; 3771 3772 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3773 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3774 for (i=0;i<reuse_solver->benign_n;i++) { 3775 const PetscScalar *vals; 3776 const PetscInt *idxs,*idxs_zero; 3777 PetscInt n,j,nz; 3778 3779 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3780 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3781 for (j=0;j<n_vertices;j++) { 3782 PetscInt k; 3783 sums[j] = 0.; 3784 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3785 } 3786 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3787 for (j=0;j<n;j++) { 3788 PetscScalar val = vals[j]; 3789 PetscInt k; 3790 for (k=0;k<n_vertices;k++) { 3791 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3792 } 3793 } 3794 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3795 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3796 } 3797 ierr = PetscFree(sums);CHKERRQ(ierr); 3798 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3799 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3800 } 3801 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3802 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3803 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3804 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3805 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3806 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3807 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3808 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3809 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3810 } else { 3811 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3812 } 3813 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3814 3815 /* coarse basis functions */ 3816 for (i=0;i<n_vertices;i++) { 3817 PetscScalar *y; 3818 3819 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3820 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3821 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3822 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3823 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3824 y[n_B*i+idx_V_B[i]] = 1.0; 3825 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3826 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3827 3828 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3829 PetscInt j; 3830 3831 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3832 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3833 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3834 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3835 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3836 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3837 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3838 } 3839 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3840 } 3841 /* if n_R == 0 the object is not destroyed */ 3842 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3843 } 3844 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3845 3846 if (n_constraints) { 3847 Mat B; 3848 3849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3850 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3851 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3852 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3853 if (n_vertices) { 3854 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3855 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3856 } else { 3857 Mat S_VCt; 3858 3859 if (lda_rhs != n_R) { 3860 ierr = MatDestroy(&B);CHKERRQ(ierr); 3861 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3862 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3863 } 3864 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3865 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3866 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3867 } 3868 } 3869 ierr = MatDestroy(&B);CHKERRQ(ierr); 3870 /* coarse basis functions */ 3871 for (i=0;i<n_constraints;i++) { 3872 PetscScalar *y; 3873 3874 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3875 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3876 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3877 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3878 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3879 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3880 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3881 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3882 PetscInt j; 3883 3884 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3885 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3886 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3887 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3888 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3889 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3890 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3891 } 3892 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3893 } 3894 } 3895 if (n_constraints) { 3896 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3897 } 3898 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3899 3900 /* coarse matrix entries relative to B_0 */ 3901 if (pcbddc->benign_n) { 3902 Mat B0_B,B0_BPHI; 3903 IS is_dummy; 3904 PetscScalar *data; 3905 PetscInt j; 3906 3907 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3908 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3909 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3910 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3911 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3912 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3913 for (j=0;j<pcbddc->benign_n;j++) { 3914 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3915 for (i=0;i<pcbddc->local_primal_size;i++) { 3916 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3917 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3918 } 3919 } 3920 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3921 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3922 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3923 } 3924 3925 /* compute other basis functions for non-symmetric problems */ 3926 if (!pcbddc->symmetric_primal) { 3927 Mat B_V=NULL,B_C=NULL; 3928 PetscScalar *marray; 3929 3930 if (n_constraints) { 3931 Mat S_CCT,C_CRT; 3932 3933 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3934 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3935 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3936 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3937 if (n_vertices) { 3938 Mat S_VCT; 3939 3940 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3941 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3942 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3943 } 3944 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3945 } else { 3946 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3947 } 3948 if (n_vertices && n_R) { 3949 PetscScalar *av,*marray; 3950 const PetscInt *xadj,*adjncy; 3951 PetscInt n; 3952 PetscBool flg_row; 3953 3954 /* B_V = B_V - A_VR^T */ 3955 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3956 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3957 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3958 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3959 for (i=0;i<n;i++) { 3960 PetscInt j; 3961 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3962 } 3963 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3964 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3965 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3966 } 3967 3968 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3969 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3970 for (i=0;i<n_vertices;i++) { 3971 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3972 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3973 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3974 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3975 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3976 } 3977 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3978 if (B_C) { 3979 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3980 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3981 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3982 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3983 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3984 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3985 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3986 } 3987 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3988 } 3989 /* coarse basis functions */ 3990 for (i=0;i<pcbddc->local_primal_size;i++) { 3991 PetscScalar *y; 3992 3993 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3994 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3995 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3996 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3997 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3998 if (i<n_vertices) { 3999 y[n_B*i+idx_V_B[i]] = 1.0; 4000 } 4001 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4002 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4003 4004 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4005 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4006 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4007 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4008 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4009 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4010 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4011 } 4012 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4013 } 4014 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4015 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4016 } 4017 /* free memory */ 4018 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4019 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4020 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4021 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4022 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4023 ierr = PetscFree(work);CHKERRQ(ierr); 4024 if (n_vertices) { 4025 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4026 } 4027 if (n_constraints) { 4028 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4029 } 4030 /* Checking coarse_sub_mat and coarse basis functios */ 4031 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4032 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4033 if (pcbddc->dbg_flag) { 4034 Mat coarse_sub_mat; 4035 Mat AUXMAT,TM1,TM2,TM3,TM4; 4036 Mat coarse_phi_D,coarse_phi_B; 4037 Mat coarse_psi_D,coarse_psi_B; 4038 Mat A_II,A_BB,A_IB,A_BI; 4039 Mat C_B,CPHI; 4040 IS is_dummy; 4041 Vec mones; 4042 MatType checkmattype=MATSEQAIJ; 4043 PetscReal real_value; 4044 4045 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4046 Mat A; 4047 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4048 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4049 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4050 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4051 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4052 ierr = MatDestroy(&A);CHKERRQ(ierr); 4053 } else { 4054 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4055 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4056 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4057 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4058 } 4059 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4060 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4061 if (!pcbddc->symmetric_primal) { 4062 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4063 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4064 } 4065 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4066 4067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4070 if (!pcbddc->symmetric_primal) { 4071 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4072 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4073 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4074 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4075 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4076 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4077 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4078 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4079 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4080 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4081 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4082 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4083 } else { 4084 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4085 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4086 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4087 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4088 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4089 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4090 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4091 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4092 } 4093 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4094 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4095 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4096 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4097 if (pcbddc->benign_n) { 4098 Mat B0_B,B0_BPHI; 4099 PetscScalar *data,*data2; 4100 PetscInt j; 4101 4102 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4103 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4104 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4105 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4106 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4107 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4108 for (j=0;j<pcbddc->benign_n;j++) { 4109 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4110 for (i=0;i<pcbddc->local_primal_size;i++) { 4111 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4112 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4113 } 4114 } 4115 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4116 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4117 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4118 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4119 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4120 } 4121 #if 0 4122 { 4123 PetscViewer viewer; 4124 char filename[256]; 4125 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4126 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4127 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4128 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4129 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4130 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4131 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4132 if (save_change) { 4133 Mat phi_B; 4134 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4135 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4136 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4137 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4138 } else { 4139 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4140 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4141 } 4142 if (pcbddc->coarse_phi_D) { 4143 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4144 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4145 } 4146 if (pcbddc->coarse_psi_B) { 4147 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4148 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4149 } 4150 if (pcbddc->coarse_psi_D) { 4151 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4152 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4153 } 4154 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4155 } 4156 #endif 4157 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4158 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4159 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4160 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4161 4162 /* check constraints */ 4163 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4164 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4165 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4166 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4167 } else { 4168 PetscScalar *data; 4169 Mat tmat; 4170 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4171 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4172 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4173 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4174 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4175 } 4176 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4177 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4178 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4179 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4181 if (!pcbddc->symmetric_primal) { 4182 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4183 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4184 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4185 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4187 } 4188 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4189 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4190 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4191 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4192 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4193 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4194 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4195 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4196 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4197 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4198 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4199 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4200 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4201 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4202 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4203 if (!pcbddc->symmetric_primal) { 4204 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4205 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4206 } 4207 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4208 } 4209 /* get back data */ 4210 *coarse_submat_vals_n = coarse_submat_vals; 4211 PetscFunctionReturn(0); 4212 } 4213 4214 #undef __FUNCT__ 4215 #define __FUNCT__ "MatGetSubMatrixUnsorted" 4216 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4217 { 4218 Mat *work_mat; 4219 IS isrow_s,iscol_s; 4220 PetscBool rsorted,csorted; 4221 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4222 PetscErrorCode ierr; 4223 4224 PetscFunctionBegin; 4225 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4226 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4227 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4228 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4229 4230 if (!rsorted) { 4231 const PetscInt *idxs; 4232 PetscInt *idxs_sorted,i; 4233 4234 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4235 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4236 for (i=0;i<rsize;i++) { 4237 idxs_perm_r[i] = i; 4238 } 4239 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4240 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4241 for (i=0;i<rsize;i++) { 4242 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4243 } 4244 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4245 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4246 } else { 4247 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4248 isrow_s = isrow; 4249 } 4250 4251 if (!csorted) { 4252 if (isrow == iscol) { 4253 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4254 iscol_s = isrow_s; 4255 } else { 4256 const PetscInt *idxs; 4257 PetscInt *idxs_sorted,i; 4258 4259 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4260 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4261 for (i=0;i<csize;i++) { 4262 idxs_perm_c[i] = i; 4263 } 4264 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4265 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4266 for (i=0;i<csize;i++) { 4267 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4268 } 4269 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4270 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4271 } 4272 } else { 4273 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4274 iscol_s = iscol; 4275 } 4276 4277 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4278 4279 if (!rsorted || !csorted) { 4280 Mat new_mat; 4281 IS is_perm_r,is_perm_c; 4282 4283 if (!rsorted) { 4284 PetscInt *idxs_r,i; 4285 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4286 for (i=0;i<rsize;i++) { 4287 idxs_r[idxs_perm_r[i]] = i; 4288 } 4289 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4290 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4291 } else { 4292 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4293 } 4294 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4295 4296 if (!csorted) { 4297 if (isrow_s == iscol_s) { 4298 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4299 is_perm_c = is_perm_r; 4300 } else { 4301 PetscInt *idxs_c,i; 4302 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4303 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4304 for (i=0;i<csize;i++) { 4305 idxs_c[idxs_perm_c[i]] = i; 4306 } 4307 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4308 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4309 } 4310 } else { 4311 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4312 } 4313 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4314 4315 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4316 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4317 work_mat[0] = new_mat; 4318 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4319 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4320 } 4321 4322 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4323 *B = work_mat[0]; 4324 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4325 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4326 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4327 PetscFunctionReturn(0); 4328 } 4329 4330 #undef __FUNCT__ 4331 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4332 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4333 { 4334 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4335 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4336 Mat new_mat; 4337 IS is_local,is_global; 4338 PetscInt local_size; 4339 PetscBool isseqaij; 4340 PetscErrorCode ierr; 4341 4342 PetscFunctionBegin; 4343 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4344 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4345 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4346 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4347 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4348 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4349 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4350 4351 /* check */ 4352 if (pcbddc->dbg_flag) { 4353 Vec x,x_change; 4354 PetscReal error; 4355 4356 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4357 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4358 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4359 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4360 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4361 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4362 if (!pcbddc->change_interior) { 4363 const PetscScalar *x,*y,*v; 4364 PetscReal lerror = 0.; 4365 PetscInt i; 4366 4367 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4368 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4369 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4370 for (i=0;i<local_size;i++) 4371 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4372 lerror = PetscAbsScalar(x[i]-y[i]); 4373 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4374 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4375 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4376 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4377 if (error > PETSC_SMALL) { 4378 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4379 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4380 } else { 4381 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4382 } 4383 } 4384 } 4385 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4386 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4387 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4388 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4389 if (error > PETSC_SMALL) { 4390 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4391 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4392 } else { 4393 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4394 } 4395 } 4396 ierr = VecDestroy(&x);CHKERRQ(ierr); 4397 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4398 } 4399 4400 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4401 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4402 if (isseqaij) { 4403 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4404 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4405 } else { 4406 Mat work_mat; 4407 4408 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4409 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4410 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4411 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4412 } 4413 if (matis->A->symmetric_set) { 4414 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4415 #if !defined(PETSC_USE_COMPLEX) 4416 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4417 #endif 4418 } 4419 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4420 PetscFunctionReturn(0); 4421 } 4422 4423 #undef __FUNCT__ 4424 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4425 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4426 { 4427 PC_IS* pcis = (PC_IS*)(pc->data); 4428 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4429 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4430 PetscInt *idx_R_local=NULL; 4431 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4432 PetscInt vbs,bs; 4433 PetscBT bitmask=NULL; 4434 PetscErrorCode ierr; 4435 4436 PetscFunctionBegin; 4437 /* 4438 No need to setup local scatters if 4439 - primal space is unchanged 4440 AND 4441 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4442 AND 4443 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4444 */ 4445 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4446 PetscFunctionReturn(0); 4447 } 4448 /* destroy old objects */ 4449 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4450 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4451 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4452 /* Set Non-overlapping dimensions */ 4453 n_B = pcis->n_B; 4454 n_D = pcis->n - n_B; 4455 n_vertices = pcbddc->n_vertices; 4456 4457 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4458 4459 /* create auxiliary bitmask and allocate workspace */ 4460 if (!sub_schurs || !sub_schurs->reuse_solver) { 4461 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4462 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4463 for (i=0;i<n_vertices;i++) { 4464 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4465 } 4466 4467 for (i=0, n_R=0; i<pcis->n; i++) { 4468 if (!PetscBTLookup(bitmask,i)) { 4469 idx_R_local[n_R++] = i; 4470 } 4471 } 4472 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4473 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4474 4475 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4476 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4477 } 4478 4479 /* Block code */ 4480 vbs = 1; 4481 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4482 if (bs>1 && !(n_vertices%bs)) { 4483 PetscBool is_blocked = PETSC_TRUE; 4484 PetscInt *vary; 4485 if (!sub_schurs || !sub_schurs->reuse_solver) { 4486 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4487 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4488 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4489 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4490 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4491 for (i=0; i<pcis->n/bs; i++) { 4492 if (vary[i]!=0 && vary[i]!=bs) { 4493 is_blocked = PETSC_FALSE; 4494 break; 4495 } 4496 } 4497 ierr = PetscFree(vary);CHKERRQ(ierr); 4498 } else { 4499 /* Verify directly the R set */ 4500 for (i=0; i<n_R/bs; i++) { 4501 PetscInt j,node=idx_R_local[bs*i]; 4502 for (j=1; j<bs; j++) { 4503 if (node != idx_R_local[bs*i+j]-j) { 4504 is_blocked = PETSC_FALSE; 4505 break; 4506 } 4507 } 4508 } 4509 } 4510 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4511 vbs = bs; 4512 for (i=0;i<n_R/vbs;i++) { 4513 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4514 } 4515 } 4516 } 4517 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4518 if (sub_schurs && sub_schurs->reuse_solver) { 4519 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4520 4521 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4522 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4523 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4524 reuse_solver->is_R = pcbddc->is_R_local; 4525 } else { 4526 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4527 } 4528 4529 /* print some info if requested */ 4530 if (pcbddc->dbg_flag) { 4531 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4532 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4533 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4534 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4535 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4536 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4537 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4538 } 4539 4540 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4541 if (!sub_schurs || !sub_schurs->reuse_solver) { 4542 IS is_aux1,is_aux2; 4543 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4544 4545 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4546 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4547 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4548 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4549 for (i=0; i<n_D; i++) { 4550 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4551 } 4552 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4553 for (i=0, j=0; i<n_R; i++) { 4554 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4555 aux_array1[j++] = i; 4556 } 4557 } 4558 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4559 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4560 for (i=0, j=0; i<n_B; i++) { 4561 if (!PetscBTLookup(bitmask,is_indices[i])) { 4562 aux_array2[j++] = i; 4563 } 4564 } 4565 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4566 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4567 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4568 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4569 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4570 4571 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4572 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4573 for (i=0, j=0; i<n_R; i++) { 4574 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4575 aux_array1[j++] = i; 4576 } 4577 } 4578 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4579 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4580 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4581 } 4582 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4583 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4584 } else { 4585 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4586 IS tis; 4587 PetscInt schur_size; 4588 4589 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4590 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4591 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4592 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4593 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4594 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4595 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4596 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4597 } 4598 } 4599 PetscFunctionReturn(0); 4600 } 4601 4602 4603 #undef __FUNCT__ 4604 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4605 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4606 { 4607 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4608 PC_IS *pcis = (PC_IS*)pc->data; 4609 PC pc_temp; 4610 Mat A_RR; 4611 MatReuse reuse; 4612 PetscScalar m_one = -1.0; 4613 PetscReal value; 4614 PetscInt n_D,n_R; 4615 PetscBool check_corr[2],issbaij; 4616 PetscErrorCode ierr; 4617 /* prefixes stuff */ 4618 char dir_prefix[256],neu_prefix[256],str_level[16]; 4619 size_t len; 4620 4621 PetscFunctionBegin; 4622 4623 /* compute prefixes */ 4624 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4625 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4626 if (!pcbddc->current_level) { 4627 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4628 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4629 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4630 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4631 } else { 4632 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4633 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4634 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4635 len -= 15; /* remove "pc_bddc_coarse_" */ 4636 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4637 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4638 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4639 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4640 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4641 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4642 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4643 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4644 } 4645 4646 /* DIRICHLET PROBLEM */ 4647 if (dirichlet) { 4648 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4649 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4650 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4651 if (pcbddc->dbg_flag) { 4652 Mat A_IIn; 4653 4654 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4655 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4656 pcis->A_II = A_IIn; 4657 } 4658 } 4659 if (pcbddc->local_mat->symmetric_set) { 4660 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4661 } 4662 /* Matrix for Dirichlet problem is pcis->A_II */ 4663 n_D = pcis->n - pcis->n_B; 4664 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4665 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4666 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4667 /* default */ 4668 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4669 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4670 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4671 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4672 if (issbaij) { 4673 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4674 } else { 4675 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4676 } 4677 /* Allow user's customization */ 4678 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4679 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4680 } 4681 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4682 if (sub_schurs && sub_schurs->reuse_solver) { 4683 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4684 4685 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4686 } 4687 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4688 if (!n_D) { 4689 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4690 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4691 } 4692 /* Set Up KSP for Dirichlet problem of BDDC */ 4693 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4694 /* set ksp_D into pcis data */ 4695 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4696 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4697 pcis->ksp_D = pcbddc->ksp_D; 4698 } 4699 4700 /* NEUMANN PROBLEM */ 4701 A_RR = 0; 4702 if (neumann) { 4703 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4704 PetscInt ibs,mbs; 4705 PetscBool issbaij; 4706 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4707 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4708 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4709 if (pcbddc->ksp_R) { /* already created ksp */ 4710 PetscInt nn_R; 4711 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4712 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4713 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4714 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4715 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4716 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4717 reuse = MAT_INITIAL_MATRIX; 4718 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4719 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4720 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4721 reuse = MAT_INITIAL_MATRIX; 4722 } else { /* safe to reuse the matrix */ 4723 reuse = MAT_REUSE_MATRIX; 4724 } 4725 } 4726 /* last check */ 4727 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4728 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4729 reuse = MAT_INITIAL_MATRIX; 4730 } 4731 } else { /* first time, so we need to create the matrix */ 4732 reuse = MAT_INITIAL_MATRIX; 4733 } 4734 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4735 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4736 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4737 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4738 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4739 if (matis->A == pcbddc->local_mat) { 4740 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4741 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4742 } else { 4743 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4744 } 4745 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4746 if (matis->A == pcbddc->local_mat) { 4747 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4748 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4749 } else { 4750 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4751 } 4752 } 4753 /* extract A_RR */ 4754 if (sub_schurs && sub_schurs->reuse_solver) { 4755 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4756 4757 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4758 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4759 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4760 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4761 } else { 4762 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4763 } 4764 } else { 4765 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4766 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4767 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4768 } 4769 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4770 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4771 } 4772 if (pcbddc->local_mat->symmetric_set) { 4773 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4774 } 4775 if (!pcbddc->ksp_R) { /* create object if not present */ 4776 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4777 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4778 /* default */ 4779 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4780 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4781 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4782 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4783 if (issbaij) { 4784 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4785 } else { 4786 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4787 } 4788 /* Allow user's customization */ 4789 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4790 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4791 } 4792 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4793 if (!n_R) { 4794 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4795 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4796 } 4797 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4798 /* Reuse solver if it is present */ 4799 if (sub_schurs && sub_schurs->reuse_solver) { 4800 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4801 4802 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4803 } 4804 /* Set Up KSP for Neumann problem of BDDC */ 4805 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4806 } 4807 4808 if (pcbddc->dbg_flag) { 4809 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4810 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4811 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4812 } 4813 4814 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4815 check_corr[0] = check_corr[1] = PETSC_FALSE; 4816 if (pcbddc->NullSpace_corr[0]) { 4817 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4818 } 4819 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4820 check_corr[0] = PETSC_TRUE; 4821 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4822 } 4823 if (neumann && pcbddc->NullSpace_corr[2]) { 4824 check_corr[1] = PETSC_TRUE; 4825 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4826 } 4827 4828 /* check Dirichlet and Neumann solvers */ 4829 if (pcbddc->dbg_flag) { 4830 if (dirichlet) { /* Dirichlet */ 4831 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4832 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4833 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4834 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4835 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4836 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4837 if (check_corr[0]) { 4838 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4839 } 4840 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4841 } 4842 if (neumann) { /* Neumann */ 4843 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4844 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4845 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4846 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4847 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4848 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4849 if (check_corr[1]) { 4850 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4851 } 4852 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4853 } 4854 } 4855 /* free Neumann problem's matrix */ 4856 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4857 PetscFunctionReturn(0); 4858 } 4859 4860 #undef __FUNCT__ 4861 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4862 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4863 { 4864 PetscErrorCode ierr; 4865 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4866 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4867 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4868 4869 PetscFunctionBegin; 4870 if (!reuse_solver) { 4871 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4872 } 4873 if (!pcbddc->switch_static) { 4874 if (applytranspose && pcbddc->local_auxmat1) { 4875 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4876 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4877 } 4878 if (!reuse_solver) { 4879 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4880 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4881 } else { 4882 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4883 4884 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4885 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4886 } 4887 } else { 4888 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4889 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4890 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4891 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4892 if (applytranspose && pcbddc->local_auxmat1) { 4893 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4894 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4895 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4896 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4897 } 4898 } 4899 if (!reuse_solver || pcbddc->switch_static) { 4900 if (applytranspose) { 4901 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4902 } else { 4903 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4904 } 4905 } else { 4906 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4907 4908 if (applytranspose) { 4909 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4910 } else { 4911 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4912 } 4913 } 4914 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4915 if (!pcbddc->switch_static) { 4916 if (!reuse_solver) { 4917 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4918 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4919 } else { 4920 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4921 4922 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4923 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4924 } 4925 if (!applytranspose && pcbddc->local_auxmat1) { 4926 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4927 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4928 } 4929 } else { 4930 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4931 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4932 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4933 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4934 if (!applytranspose && pcbddc->local_auxmat1) { 4935 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4936 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4937 } 4938 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4939 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4940 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4941 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4942 } 4943 PetscFunctionReturn(0); 4944 } 4945 4946 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4947 #undef __FUNCT__ 4948 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4949 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4950 { 4951 PetscErrorCode ierr; 4952 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4953 PC_IS* pcis = (PC_IS*) (pc->data); 4954 const PetscScalar zero = 0.0; 4955 4956 PetscFunctionBegin; 4957 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4958 if (!pcbddc->benign_apply_coarse_only) { 4959 if (applytranspose) { 4960 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4961 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4962 } else { 4963 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4964 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4965 } 4966 } else { 4967 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4968 } 4969 4970 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4971 if (pcbddc->benign_n) { 4972 PetscScalar *array; 4973 PetscInt j; 4974 4975 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4976 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4977 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4978 } 4979 4980 /* start communications from local primal nodes to rhs of coarse solver */ 4981 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4982 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4983 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4984 4985 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4986 if (pcbddc->coarse_ksp) { 4987 Mat coarse_mat; 4988 Vec rhs,sol; 4989 MatNullSpace nullsp; 4990 PetscBool isbddc = PETSC_FALSE; 4991 4992 if (pcbddc->benign_have_null) { 4993 PC coarse_pc; 4994 4995 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4996 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4997 /* we need to propagate to coarser levels the need for a possible benign correction */ 4998 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4999 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5000 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5001 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5002 } 5003 } 5004 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5005 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5006 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5007 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5008 if (nullsp) { 5009 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5010 } 5011 if (applytranspose) { 5012 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5013 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5014 } else { 5015 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5016 PC coarse_pc; 5017 5018 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5019 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5020 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5021 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5022 } else { 5023 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5024 } 5025 } 5026 /* we don't need the benign correction at coarser levels anymore */ 5027 if (pcbddc->benign_have_null && isbddc) { 5028 PC coarse_pc; 5029 PC_BDDC* coarsepcbddc; 5030 5031 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5032 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5033 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5034 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5035 } 5036 if (nullsp) { 5037 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5038 } 5039 } 5040 5041 /* Local solution on R nodes */ 5042 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5043 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5044 } 5045 /* communications from coarse sol to local primal nodes */ 5046 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5047 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5048 5049 /* Sum contributions from the two levels */ 5050 if (!pcbddc->benign_apply_coarse_only) { 5051 if (applytranspose) { 5052 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5053 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5054 } else { 5055 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5056 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5057 } 5058 /* store p0 */ 5059 if (pcbddc->benign_n) { 5060 PetscScalar *array; 5061 PetscInt j; 5062 5063 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5064 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5065 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5066 } 5067 } else { /* expand the coarse solution */ 5068 if (applytranspose) { 5069 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5070 } else { 5071 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5072 } 5073 } 5074 PetscFunctionReturn(0); 5075 } 5076 5077 #undef __FUNCT__ 5078 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 5079 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5080 { 5081 PetscErrorCode ierr; 5082 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5083 PetscScalar *array; 5084 Vec from,to; 5085 5086 PetscFunctionBegin; 5087 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5088 from = pcbddc->coarse_vec; 5089 to = pcbddc->vec1_P; 5090 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5091 Vec tvec; 5092 5093 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5094 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5095 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5096 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5097 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5098 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5099 } 5100 } else { /* from local to global -> put data in coarse right hand side */ 5101 from = pcbddc->vec1_P; 5102 to = pcbddc->coarse_vec; 5103 } 5104 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5105 PetscFunctionReturn(0); 5106 } 5107 5108 #undef __FUNCT__ 5109 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 5110 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5111 { 5112 PetscErrorCode ierr; 5113 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5114 PetscScalar *array; 5115 Vec from,to; 5116 5117 PetscFunctionBegin; 5118 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5119 from = pcbddc->coarse_vec; 5120 to = pcbddc->vec1_P; 5121 } else { /* from local to global -> put data in coarse right hand side */ 5122 from = pcbddc->vec1_P; 5123 to = pcbddc->coarse_vec; 5124 } 5125 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5126 if (smode == SCATTER_FORWARD) { 5127 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5128 Vec tvec; 5129 5130 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5131 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5132 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5133 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5134 } 5135 } else { 5136 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5137 ierr = VecResetArray(from);CHKERRQ(ierr); 5138 } 5139 } 5140 PetscFunctionReturn(0); 5141 } 5142 5143 /* uncomment for testing purposes */ 5144 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5145 #undef __FUNCT__ 5146 #define __FUNCT__ "PCBDDCConstraintsSetUp" 5147 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5148 { 5149 PetscErrorCode ierr; 5150 PC_IS* pcis = (PC_IS*)(pc->data); 5151 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5152 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5153 /* one and zero */ 5154 PetscScalar one=1.0,zero=0.0; 5155 /* space to store constraints and their local indices */ 5156 PetscScalar *constraints_data; 5157 PetscInt *constraints_idxs,*constraints_idxs_B; 5158 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5159 PetscInt *constraints_n; 5160 /* iterators */ 5161 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5162 /* BLAS integers */ 5163 PetscBLASInt lwork,lierr; 5164 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5165 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5166 /* reuse */ 5167 PetscInt olocal_primal_size,olocal_primal_size_cc; 5168 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5169 /* change of basis */ 5170 PetscBool qr_needed; 5171 PetscBT change_basis,qr_needed_idx; 5172 /* auxiliary stuff */ 5173 PetscInt *nnz,*is_indices; 5174 PetscInt ncc; 5175 /* some quantities */ 5176 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5177 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5178 5179 PetscFunctionBegin; 5180 /* Destroy Mat objects computed previously */ 5181 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5182 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5183 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5184 /* save info on constraints from previous setup (if any) */ 5185 olocal_primal_size = pcbddc->local_primal_size; 5186 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5187 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5188 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5189 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5190 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5191 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5192 5193 if (!pcbddc->adaptive_selection) { 5194 IS ISForVertices,*ISForFaces,*ISForEdges; 5195 MatNullSpace nearnullsp; 5196 const Vec *nearnullvecs; 5197 Vec *localnearnullsp; 5198 PetscScalar *array; 5199 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5200 PetscBool nnsp_has_cnst; 5201 /* LAPACK working arrays for SVD or POD */ 5202 PetscBool skip_lapack,boolforchange; 5203 PetscScalar *work; 5204 PetscReal *singular_vals; 5205 #if defined(PETSC_USE_COMPLEX) 5206 PetscReal *rwork; 5207 #endif 5208 #if defined(PETSC_MISSING_LAPACK_GESVD) 5209 PetscScalar *temp_basis,*correlation_mat; 5210 #else 5211 PetscBLASInt dummy_int=1; 5212 PetscScalar dummy_scalar=1.; 5213 #endif 5214 5215 /* Get index sets for faces, edges and vertices from graph */ 5216 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5217 /* print some info */ 5218 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5219 PetscInt nv; 5220 5221 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5222 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5223 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5225 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5226 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5227 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5228 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5229 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5230 } 5231 5232 /* free unneeded index sets */ 5233 if (!pcbddc->use_vertices) { 5234 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5235 } 5236 if (!pcbddc->use_edges) { 5237 for (i=0;i<n_ISForEdges;i++) { 5238 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5239 } 5240 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5241 n_ISForEdges = 0; 5242 } 5243 if (!pcbddc->use_faces) { 5244 for (i=0;i<n_ISForFaces;i++) { 5245 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5246 } 5247 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5248 n_ISForFaces = 0; 5249 } 5250 5251 /* check if near null space is attached to global mat */ 5252 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5253 if (nearnullsp) { 5254 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5255 /* remove any stored info */ 5256 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5257 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5258 /* store information for BDDC solver reuse */ 5259 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5260 pcbddc->onearnullspace = nearnullsp; 5261 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5262 for (i=0;i<nnsp_size;i++) { 5263 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5264 } 5265 } else { /* if near null space is not provided BDDC uses constants by default */ 5266 nnsp_size = 0; 5267 nnsp_has_cnst = PETSC_TRUE; 5268 } 5269 /* get max number of constraints on a single cc */ 5270 max_constraints = nnsp_size; 5271 if (nnsp_has_cnst) max_constraints++; 5272 5273 /* 5274 Evaluate maximum storage size needed by the procedure 5275 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5276 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5277 There can be multiple constraints per connected component 5278 */ 5279 n_vertices = 0; 5280 if (ISForVertices) { 5281 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5282 } 5283 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5284 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5285 5286 total_counts = n_ISForFaces+n_ISForEdges; 5287 total_counts *= max_constraints; 5288 total_counts += n_vertices; 5289 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5290 5291 total_counts = 0; 5292 max_size_of_constraint = 0; 5293 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5294 IS used_is; 5295 if (i<n_ISForEdges) { 5296 used_is = ISForEdges[i]; 5297 } else { 5298 used_is = ISForFaces[i-n_ISForEdges]; 5299 } 5300 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5301 total_counts += j; 5302 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5303 } 5304 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5305 5306 /* get local part of global near null space vectors */ 5307 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5308 for (k=0;k<nnsp_size;k++) { 5309 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5310 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5311 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5312 } 5313 5314 /* whether or not to skip lapack calls */ 5315 skip_lapack = PETSC_TRUE; 5316 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5317 5318 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5319 if (!skip_lapack) { 5320 PetscScalar temp_work; 5321 5322 #if defined(PETSC_MISSING_LAPACK_GESVD) 5323 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5324 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5325 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5326 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5327 #if defined(PETSC_USE_COMPLEX) 5328 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5329 #endif 5330 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5331 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5332 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5333 lwork = -1; 5334 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5335 #if !defined(PETSC_USE_COMPLEX) 5336 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5337 #else 5338 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5339 #endif 5340 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5341 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5342 #else /* on missing GESVD */ 5343 /* SVD */ 5344 PetscInt max_n,min_n; 5345 max_n = max_size_of_constraint; 5346 min_n = max_constraints; 5347 if (max_size_of_constraint < max_constraints) { 5348 min_n = max_size_of_constraint; 5349 max_n = max_constraints; 5350 } 5351 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5352 #if defined(PETSC_USE_COMPLEX) 5353 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5354 #endif 5355 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5356 lwork = -1; 5357 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5358 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5359 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5360 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5361 #if !defined(PETSC_USE_COMPLEX) 5362 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5363 #else 5364 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5365 #endif 5366 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5367 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5368 #endif /* on missing GESVD */ 5369 /* Allocate optimal workspace */ 5370 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5371 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5372 } 5373 /* Now we can loop on constraining sets */ 5374 total_counts = 0; 5375 constraints_idxs_ptr[0] = 0; 5376 constraints_data_ptr[0] = 0; 5377 /* vertices */ 5378 if (n_vertices) { 5379 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5380 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5381 for (i=0;i<n_vertices;i++) { 5382 constraints_n[total_counts] = 1; 5383 constraints_data[total_counts] = 1.0; 5384 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5385 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5386 total_counts++; 5387 } 5388 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5389 n_vertices = total_counts; 5390 } 5391 5392 /* edges and faces */ 5393 total_counts_cc = total_counts; 5394 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5395 IS used_is; 5396 PetscBool idxs_copied = PETSC_FALSE; 5397 5398 if (ncc<n_ISForEdges) { 5399 used_is = ISForEdges[ncc]; 5400 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5401 } else { 5402 used_is = ISForFaces[ncc-n_ISForEdges]; 5403 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5404 } 5405 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5406 5407 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5408 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5409 /* change of basis should not be performed on local periodic nodes */ 5410 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5411 if (nnsp_has_cnst) { 5412 PetscScalar quad_value; 5413 5414 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5415 idxs_copied = PETSC_TRUE; 5416 5417 if (!pcbddc->use_nnsp_true) { 5418 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5419 } else { 5420 quad_value = 1.0; 5421 } 5422 for (j=0;j<size_of_constraint;j++) { 5423 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5424 } 5425 temp_constraints++; 5426 total_counts++; 5427 } 5428 for (k=0;k<nnsp_size;k++) { 5429 PetscReal real_value; 5430 PetscScalar *ptr_to_data; 5431 5432 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5433 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5434 for (j=0;j<size_of_constraint;j++) { 5435 ptr_to_data[j] = array[is_indices[j]]; 5436 } 5437 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5438 /* check if array is null on the connected component */ 5439 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5440 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5441 if (real_value > 0.0) { /* keep indices and values */ 5442 temp_constraints++; 5443 total_counts++; 5444 if (!idxs_copied) { 5445 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5446 idxs_copied = PETSC_TRUE; 5447 } 5448 } 5449 } 5450 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5451 valid_constraints = temp_constraints; 5452 if (!pcbddc->use_nnsp_true && temp_constraints) { 5453 if (temp_constraints == 1) { /* just normalize the constraint */ 5454 PetscScalar norm,*ptr_to_data; 5455 5456 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5457 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5458 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5459 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5460 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5461 } else { /* perform SVD */ 5462 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5463 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5464 5465 #if defined(PETSC_MISSING_LAPACK_GESVD) 5466 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5467 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5468 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5469 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5470 from that computed using LAPACKgesvd 5471 -> This is due to a different computation of eigenvectors in LAPACKheev 5472 -> The quality of the POD-computed basis will be the same */ 5473 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5474 /* Store upper triangular part of correlation matrix */ 5475 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5476 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5477 for (j=0;j<temp_constraints;j++) { 5478 for (k=0;k<j+1;k++) { 5479 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5480 } 5481 } 5482 /* compute eigenvalues and eigenvectors of correlation matrix */ 5483 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5484 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5485 #if !defined(PETSC_USE_COMPLEX) 5486 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5487 #else 5488 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5489 #endif 5490 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5491 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5492 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5493 j = 0; 5494 while (j < temp_constraints && singular_vals[j] < tol) j++; 5495 total_counts = total_counts-j; 5496 valid_constraints = temp_constraints-j; 5497 /* scale and copy POD basis into used quadrature memory */ 5498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5499 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5500 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5501 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5502 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5503 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5504 if (j<temp_constraints) { 5505 PetscInt ii; 5506 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5507 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5508 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5509 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5510 for (k=0;k<temp_constraints-j;k++) { 5511 for (ii=0;ii<size_of_constraint;ii++) { 5512 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5513 } 5514 } 5515 } 5516 #else /* on missing GESVD */ 5517 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5518 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5519 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5520 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5521 #if !defined(PETSC_USE_COMPLEX) 5522 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5523 #else 5524 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5525 #endif 5526 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5527 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5528 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5529 k = temp_constraints; 5530 if (k > size_of_constraint) k = size_of_constraint; 5531 j = 0; 5532 while (j < k && singular_vals[k-j-1] < tol) j++; 5533 valid_constraints = k-j; 5534 total_counts = total_counts-temp_constraints+valid_constraints; 5535 #endif /* on missing GESVD */ 5536 } 5537 } 5538 /* update pointers information */ 5539 if (valid_constraints) { 5540 constraints_n[total_counts_cc] = valid_constraints; 5541 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5542 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5543 /* set change_of_basis flag */ 5544 if (boolforchange) { 5545 PetscBTSet(change_basis,total_counts_cc); 5546 } 5547 total_counts_cc++; 5548 } 5549 } 5550 /* free workspace */ 5551 if (!skip_lapack) { 5552 ierr = PetscFree(work);CHKERRQ(ierr); 5553 #if defined(PETSC_USE_COMPLEX) 5554 ierr = PetscFree(rwork);CHKERRQ(ierr); 5555 #endif 5556 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5557 #if defined(PETSC_MISSING_LAPACK_GESVD) 5558 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5559 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5560 #endif 5561 } 5562 for (k=0;k<nnsp_size;k++) { 5563 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5564 } 5565 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5566 /* free index sets of faces, edges and vertices */ 5567 for (i=0;i<n_ISForFaces;i++) { 5568 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5569 } 5570 if (n_ISForFaces) { 5571 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5572 } 5573 for (i=0;i<n_ISForEdges;i++) { 5574 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5575 } 5576 if (n_ISForEdges) { 5577 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5578 } 5579 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5580 } else { 5581 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5582 5583 total_counts = 0; 5584 n_vertices = 0; 5585 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5586 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5587 } 5588 max_constraints = 0; 5589 total_counts_cc = 0; 5590 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5591 total_counts += pcbddc->adaptive_constraints_n[i]; 5592 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5593 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5594 } 5595 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5596 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5597 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5598 constraints_data = pcbddc->adaptive_constraints_data; 5599 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5600 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5601 total_counts_cc = 0; 5602 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5603 if (pcbddc->adaptive_constraints_n[i]) { 5604 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5605 } 5606 } 5607 #if 0 5608 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5609 for (i=0;i<total_counts_cc;i++) { 5610 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5611 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5612 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5613 printf(" %d",constraints_idxs[j]); 5614 } 5615 printf("\n"); 5616 printf("number of cc: %d\n",constraints_n[i]); 5617 } 5618 for (i=0;i<n_vertices;i++) { 5619 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5620 } 5621 for (i=0;i<sub_schurs->n_subs;i++) { 5622 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5623 } 5624 #endif 5625 5626 max_size_of_constraint = 0; 5627 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5628 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5629 /* Change of basis */ 5630 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5631 if (pcbddc->use_change_of_basis) { 5632 for (i=0;i<sub_schurs->n_subs;i++) { 5633 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5634 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5635 } 5636 } 5637 } 5638 } 5639 pcbddc->local_primal_size = total_counts; 5640 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5641 5642 /* map constraints_idxs in boundary numbering */ 5643 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5644 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5645 5646 /* Create constraint matrix */ 5647 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5648 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5649 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5650 5651 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5652 /* determine if a QR strategy is needed for change of basis */ 5653 qr_needed = PETSC_FALSE; 5654 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5655 total_primal_vertices=0; 5656 pcbddc->local_primal_size_cc = 0; 5657 for (i=0;i<total_counts_cc;i++) { 5658 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5659 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5660 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5661 pcbddc->local_primal_size_cc += 1; 5662 } else if (PetscBTLookup(change_basis,i)) { 5663 for (k=0;k<constraints_n[i];k++) { 5664 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5665 } 5666 pcbddc->local_primal_size_cc += constraints_n[i]; 5667 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5668 PetscBTSet(qr_needed_idx,i); 5669 qr_needed = PETSC_TRUE; 5670 } 5671 } else { 5672 pcbddc->local_primal_size_cc += 1; 5673 } 5674 } 5675 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5676 pcbddc->n_vertices = total_primal_vertices; 5677 /* permute indices in order to have a sorted set of vertices */ 5678 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5679 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5680 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5681 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5682 5683 /* nonzero structure of constraint matrix */ 5684 /* and get reference dof for local constraints */ 5685 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5686 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5687 5688 j = total_primal_vertices; 5689 total_counts = total_primal_vertices; 5690 cum = total_primal_vertices; 5691 for (i=n_vertices;i<total_counts_cc;i++) { 5692 if (!PetscBTLookup(change_basis,i)) { 5693 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5694 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5695 cum++; 5696 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5697 for (k=0;k<constraints_n[i];k++) { 5698 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5699 nnz[j+k] = size_of_constraint; 5700 } 5701 j += constraints_n[i]; 5702 } 5703 } 5704 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5705 ierr = PetscFree(nnz);CHKERRQ(ierr); 5706 5707 /* set values in constraint matrix */ 5708 for (i=0;i<total_primal_vertices;i++) { 5709 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5710 } 5711 total_counts = total_primal_vertices; 5712 for (i=n_vertices;i<total_counts_cc;i++) { 5713 if (!PetscBTLookup(change_basis,i)) { 5714 PetscInt *cols; 5715 5716 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5717 cols = constraints_idxs+constraints_idxs_ptr[i]; 5718 for (k=0;k<constraints_n[i];k++) { 5719 PetscInt row = total_counts+k; 5720 PetscScalar *vals; 5721 5722 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5723 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5724 } 5725 total_counts += constraints_n[i]; 5726 } 5727 } 5728 /* assembling */ 5729 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5730 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5731 5732 /* 5733 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5734 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5735 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5736 */ 5737 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5738 if (pcbddc->use_change_of_basis) { 5739 /* dual and primal dofs on a single cc */ 5740 PetscInt dual_dofs,primal_dofs; 5741 /* working stuff for GEQRF */ 5742 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5743 PetscBLASInt lqr_work; 5744 /* working stuff for UNGQR */ 5745 PetscScalar *gqr_work,lgqr_work_t; 5746 PetscBLASInt lgqr_work; 5747 /* working stuff for TRTRS */ 5748 PetscScalar *trs_rhs; 5749 PetscBLASInt Blas_NRHS; 5750 /* pointers for values insertion into change of basis matrix */ 5751 PetscInt *start_rows,*start_cols; 5752 PetscScalar *start_vals; 5753 /* working stuff for values insertion */ 5754 PetscBT is_primal; 5755 PetscInt *aux_primal_numbering_B; 5756 /* matrix sizes */ 5757 PetscInt global_size,local_size; 5758 /* temporary change of basis */ 5759 Mat localChangeOfBasisMatrix; 5760 /* extra space for debugging */ 5761 PetscScalar *dbg_work; 5762 5763 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5764 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5765 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5766 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5767 /* nonzeros for local mat */ 5768 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5769 if (!pcbddc->benign_change || pcbddc->fake_change) { 5770 for (i=0;i<pcis->n;i++) nnz[i]=1; 5771 } else { 5772 const PetscInt *ii; 5773 PetscInt n; 5774 PetscBool flg_row; 5775 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5776 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5777 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5778 } 5779 for (i=n_vertices;i<total_counts_cc;i++) { 5780 if (PetscBTLookup(change_basis,i)) { 5781 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5782 if (PetscBTLookup(qr_needed_idx,i)) { 5783 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5784 } else { 5785 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5786 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5787 } 5788 } 5789 } 5790 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5791 ierr = PetscFree(nnz);CHKERRQ(ierr); 5792 /* Set interior change in the matrix */ 5793 if (!pcbddc->benign_change || pcbddc->fake_change) { 5794 for (i=0;i<pcis->n;i++) { 5795 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5796 } 5797 } else { 5798 const PetscInt *ii,*jj; 5799 PetscScalar *aa; 5800 PetscInt n; 5801 PetscBool flg_row; 5802 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5803 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5804 for (i=0;i<n;i++) { 5805 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5806 } 5807 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5808 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5809 } 5810 5811 if (pcbddc->dbg_flag) { 5812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5813 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5814 } 5815 5816 5817 /* Now we loop on the constraints which need a change of basis */ 5818 /* 5819 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5820 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5821 5822 Basic blocks of change of basis matrix T computed by 5823 5824 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5825 5826 | 1 0 ... 0 s_1/S | 5827 | 0 1 ... 0 s_2/S | 5828 | ... | 5829 | 0 ... 1 s_{n-1}/S | 5830 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5831 5832 with S = \sum_{i=1}^n s_i^2 5833 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5834 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5835 5836 - QR decomposition of constraints otherwise 5837 */ 5838 if (qr_needed) { 5839 /* space to store Q */ 5840 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5841 /* array to store scaling factors for reflectors */ 5842 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5843 /* first we issue queries for optimal work */ 5844 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5845 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5846 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5847 lqr_work = -1; 5848 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5849 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5850 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5851 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5852 lgqr_work = -1; 5853 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5854 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5855 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5856 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5857 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5858 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5859 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5860 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5861 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5862 /* array to store rhs and solution of triangular solver */ 5863 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5864 /* allocating workspace for check */ 5865 if (pcbddc->dbg_flag) { 5866 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5867 } 5868 } 5869 /* array to store whether a node is primal or not */ 5870 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5871 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5872 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5873 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5874 for (i=0;i<total_primal_vertices;i++) { 5875 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5876 } 5877 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5878 5879 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5880 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5881 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5882 if (PetscBTLookup(change_basis,total_counts)) { 5883 /* get constraint info */ 5884 primal_dofs = constraints_n[total_counts]; 5885 dual_dofs = size_of_constraint-primal_dofs; 5886 5887 if (pcbddc->dbg_flag) { 5888 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5889 } 5890 5891 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5892 5893 /* copy quadrature constraints for change of basis check */ 5894 if (pcbddc->dbg_flag) { 5895 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5896 } 5897 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5898 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5899 5900 /* compute QR decomposition of constraints */ 5901 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5902 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5903 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5904 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5905 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5906 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5907 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5908 5909 /* explictly compute R^-T */ 5910 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5911 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5912 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5913 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5914 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5915 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5916 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5917 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5918 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5919 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5920 5921 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5922 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5923 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5924 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5925 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5926 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5927 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5928 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5930 5931 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5932 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5933 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5934 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5935 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5936 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5937 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5938 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5939 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5940 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5941 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5942 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5943 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5944 5945 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5946 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5947 /* insert cols for primal dofs */ 5948 for (j=0;j<primal_dofs;j++) { 5949 start_vals = &qr_basis[j*size_of_constraint]; 5950 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5951 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5952 } 5953 /* insert cols for dual dofs */ 5954 for (j=0,k=0;j<dual_dofs;k++) { 5955 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5956 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5957 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5958 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5959 j++; 5960 } 5961 } 5962 5963 /* check change of basis */ 5964 if (pcbddc->dbg_flag) { 5965 PetscInt ii,jj; 5966 PetscBool valid_qr=PETSC_TRUE; 5967 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5968 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5969 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5970 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5971 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5972 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5973 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5974 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5975 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5976 for (jj=0;jj<size_of_constraint;jj++) { 5977 for (ii=0;ii<primal_dofs;ii++) { 5978 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5979 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5980 } 5981 } 5982 if (!valid_qr) { 5983 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5984 for (jj=0;jj<size_of_constraint;jj++) { 5985 for (ii=0;ii<primal_dofs;ii++) { 5986 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5987 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5988 } 5989 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5990 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5991 } 5992 } 5993 } 5994 } else { 5995 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5996 } 5997 } 5998 } else { /* simple transformation block */ 5999 PetscInt row,col; 6000 PetscScalar val,norm; 6001 6002 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6003 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6004 for (j=0;j<size_of_constraint;j++) { 6005 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6006 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6007 if (!PetscBTLookup(is_primal,row_B)) { 6008 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6009 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6010 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6011 } else { 6012 for (k=0;k<size_of_constraint;k++) { 6013 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6014 if (row != col) { 6015 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6016 } else { 6017 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6018 } 6019 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6020 } 6021 } 6022 } 6023 if (pcbddc->dbg_flag) { 6024 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6025 } 6026 } 6027 } else { 6028 if (pcbddc->dbg_flag) { 6029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6030 } 6031 } 6032 } 6033 6034 /* free workspace */ 6035 if (qr_needed) { 6036 if (pcbddc->dbg_flag) { 6037 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6038 } 6039 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6040 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6041 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6042 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6043 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6044 } 6045 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6046 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6047 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6048 6049 /* assembling of global change of variable */ 6050 if (!pcbddc->fake_change) { 6051 Mat tmat; 6052 PetscInt bs; 6053 6054 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6055 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6056 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6057 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6058 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6059 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6060 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6061 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6062 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6063 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6064 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6065 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6066 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6067 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6068 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6069 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6070 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6071 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6072 6073 /* check */ 6074 if (pcbddc->dbg_flag) { 6075 PetscReal error; 6076 Vec x,x_change; 6077 6078 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6079 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6080 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6081 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6082 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6083 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6084 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6085 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6086 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6087 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6088 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6089 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6090 if (error > PETSC_SMALL) { 6091 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6092 } 6093 ierr = VecDestroy(&x);CHKERRQ(ierr); 6094 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6095 } 6096 /* adapt sub_schurs computed (if any) */ 6097 if (pcbddc->use_deluxe_scaling) { 6098 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6099 6100 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6101 if (sub_schurs && sub_schurs->S_Ej_all) { 6102 Mat S_new,tmat; 6103 IS is_all_N,is_V_Sall = NULL; 6104 6105 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6106 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6107 if (pcbddc->deluxe_zerorows) { 6108 ISLocalToGlobalMapping NtoSall; 6109 IS is_V; 6110 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6111 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6112 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6113 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6114 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6115 } 6116 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6117 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6118 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6119 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6120 if (pcbddc->deluxe_zerorows) { 6121 const PetscScalar *array; 6122 const PetscInt *idxs_V,*idxs_all; 6123 PetscInt i,n_V; 6124 6125 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6126 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6127 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6128 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6129 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6130 for (i=0;i<n_V;i++) { 6131 PetscScalar val; 6132 PetscInt idx; 6133 6134 idx = idxs_V[i]; 6135 val = array[idxs_all[idxs_V[i]]]; 6136 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6137 } 6138 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6139 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6140 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6141 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6142 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6143 } 6144 sub_schurs->S_Ej_all = S_new; 6145 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6146 if (sub_schurs->sum_S_Ej_all) { 6147 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6148 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6149 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6150 if (pcbddc->deluxe_zerorows) { 6151 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6152 } 6153 sub_schurs->sum_S_Ej_all = S_new; 6154 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6155 } 6156 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6157 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6158 } 6159 /* destroy any change of basis context in sub_schurs */ 6160 if (sub_schurs && sub_schurs->change) { 6161 PetscInt i; 6162 6163 for (i=0;i<sub_schurs->n_subs;i++) { 6164 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6165 } 6166 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6167 } 6168 } 6169 if (pcbddc->switch_static) { /* need to save the local change */ 6170 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6171 } else { 6172 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6173 } 6174 /* determine if any process has changed the pressures locally */ 6175 pcbddc->change_interior = pcbddc->benign_have_null; 6176 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6177 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6178 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6179 pcbddc->use_qr_single = qr_needed; 6180 } 6181 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6182 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6183 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6184 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6185 } else { 6186 Mat benign_global = NULL; 6187 if (pcbddc->benign_have_null) { 6188 Mat tmat; 6189 6190 pcbddc->change_interior = PETSC_TRUE; 6191 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6192 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6193 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6194 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6195 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6196 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6197 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6198 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6199 if (pcbddc->benign_change) { 6200 Mat M; 6201 6202 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6203 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6204 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6205 ierr = MatDestroy(&M);CHKERRQ(ierr); 6206 } else { 6207 Mat eye; 6208 PetscScalar *array; 6209 6210 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6211 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6212 for (i=0;i<pcis->n;i++) { 6213 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6214 } 6215 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6216 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6217 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6218 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6219 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6220 } 6221 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6222 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6223 } 6224 if (pcbddc->user_ChangeOfBasisMatrix) { 6225 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6226 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6227 } else if (pcbddc->benign_have_null) { 6228 pcbddc->ChangeOfBasisMatrix = benign_global; 6229 } 6230 } 6231 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6232 IS is_global; 6233 const PetscInt *gidxs; 6234 6235 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6236 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6237 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6238 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6239 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6240 } 6241 } 6242 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6243 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6244 } 6245 6246 if (!pcbddc->fake_change) { 6247 /* add pressure dofs to set of primal nodes for numbering purposes */ 6248 for (i=0;i<pcbddc->benign_n;i++) { 6249 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6250 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6251 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6252 pcbddc->local_primal_size_cc++; 6253 pcbddc->local_primal_size++; 6254 } 6255 6256 /* check if a new primal space has been introduced (also take into account benign trick) */ 6257 pcbddc->new_primal_space_local = PETSC_TRUE; 6258 if (olocal_primal_size == pcbddc->local_primal_size) { 6259 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6260 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6261 if (!pcbddc->new_primal_space_local) { 6262 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6263 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6264 } 6265 } 6266 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6267 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6268 } 6269 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6270 6271 /* flush dbg viewer */ 6272 if (pcbddc->dbg_flag) { 6273 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6274 } 6275 6276 /* free workspace */ 6277 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6278 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6279 if (!pcbddc->adaptive_selection) { 6280 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6281 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6282 } else { 6283 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6284 pcbddc->adaptive_constraints_idxs_ptr, 6285 pcbddc->adaptive_constraints_data_ptr, 6286 pcbddc->adaptive_constraints_idxs, 6287 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6288 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6289 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6290 } 6291 PetscFunctionReturn(0); 6292 } 6293 6294 #undef __FUNCT__ 6295 #define __FUNCT__ "PCBDDCAnalyzeInterface" 6296 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6297 { 6298 ISLocalToGlobalMapping map; 6299 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6300 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6301 PetscInt ierr,i,N; 6302 6303 PetscFunctionBegin; 6304 if (pcbddc->recompute_topography) { 6305 pcbddc->graphanalyzed = PETSC_FALSE; 6306 /* Reset previously computed graph */ 6307 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6308 /* Init local Graph struct */ 6309 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6310 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6311 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6312 6313 /* Check validity of the csr graph passed in by the user */ 6314 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6315 6316 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6317 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 6318 PetscInt *xadj,*adjncy; 6319 PetscInt nvtxs; 6320 PetscBool flg_row=PETSC_FALSE; 6321 6322 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6323 if (flg_row) { 6324 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6325 pcbddc->computed_rowadj = PETSC_TRUE; 6326 } 6327 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6328 } 6329 if (pcbddc->dbg_flag) { 6330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6331 } 6332 6333 /* Setup of Graph */ 6334 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6335 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6336 6337 /* attach info on disconnected subdomains if present */ 6338 if (pcbddc->n_local_subs) { 6339 PetscInt *local_subs; 6340 6341 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6342 for (i=0;i<pcbddc->n_local_subs;i++) { 6343 const PetscInt *idxs; 6344 PetscInt nl,j; 6345 6346 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6347 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6348 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6349 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6350 } 6351 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6352 pcbddc->mat_graph->local_subs = local_subs; 6353 } 6354 } 6355 6356 if (!pcbddc->graphanalyzed) { 6357 /* Graph's connected components analysis */ 6358 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6359 pcbddc->graphanalyzed = PETSC_TRUE; 6360 } 6361 PetscFunctionReturn(0); 6362 } 6363 6364 #undef __FUNCT__ 6365 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6366 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6367 { 6368 PetscInt i,j; 6369 PetscScalar *alphas; 6370 PetscErrorCode ierr; 6371 6372 PetscFunctionBegin; 6373 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6374 for (i=0;i<n;i++) { 6375 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6376 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6377 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6378 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6379 } 6380 ierr = PetscFree(alphas);CHKERRQ(ierr); 6381 PetscFunctionReturn(0); 6382 } 6383 6384 #undef __FUNCT__ 6385 #define __FUNCT__ "PCBDDCMatISGetSubassemblingPattern" 6386 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6387 { 6388 Mat A; 6389 PetscInt n_neighs,*neighs,*n_shared,**shared; 6390 PetscMPIInt size,rank,color; 6391 PetscInt *xadj,*adjncy; 6392 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6393 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6394 PetscInt void_procs,*procs_candidates = NULL; 6395 PetscInt xadj_count,*count; 6396 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6397 PetscSubcomm psubcomm; 6398 MPI_Comm subcomm; 6399 PetscErrorCode ierr; 6400 6401 PetscFunctionBegin; 6402 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6403 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6404 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6405 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6406 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6407 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6408 6409 if (have_void) *have_void = PETSC_FALSE; 6410 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6411 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6412 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6413 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6414 im_active = !!n; 6415 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6416 void_procs = size - active_procs; 6417 /* get ranks of of non-active processes in mat communicator */ 6418 if (void_procs) { 6419 PetscInt ncand; 6420 6421 if (have_void) *have_void = PETSC_TRUE; 6422 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6423 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6424 for (i=0,ncand=0;i<size;i++) { 6425 if (!procs_candidates[i]) { 6426 procs_candidates[ncand++] = i; 6427 } 6428 } 6429 /* force n_subdomains to be not greater that the number of non-active processes */ 6430 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6431 } 6432 6433 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6434 number of subdomains requested 1 -> send to master or first candidate in voids */ 6435 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6436 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6437 PetscInt issize,isidx,dest; 6438 if (*n_subdomains == 1) dest = 0; 6439 else dest = rank; 6440 if (im_active) { 6441 issize = 1; 6442 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6443 isidx = procs_candidates[dest]; 6444 } else { 6445 isidx = dest; 6446 } 6447 } else { 6448 issize = 0; 6449 isidx = -1; 6450 } 6451 if (*n_subdomains != 1) *n_subdomains = active_procs; 6452 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6453 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6454 PetscFunctionReturn(0); 6455 } 6456 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6457 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6458 threshold = PetscMax(threshold,2); 6459 6460 /* Get info on mapping */ 6461 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6462 6463 /* build local CSR graph of subdomains' connectivity */ 6464 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6465 xadj[0] = 0; 6466 xadj[1] = PetscMax(n_neighs-1,0); 6467 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6468 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6469 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6470 for (i=1;i<n_neighs;i++) 6471 for (j=0;j<n_shared[i];j++) 6472 count[shared[i][j]] += 1; 6473 6474 xadj_count = 0; 6475 for (i=1;i<n_neighs;i++) { 6476 for (j=0;j<n_shared[i];j++) { 6477 if (count[shared[i][j]] < threshold) { 6478 adjncy[xadj_count] = neighs[i]; 6479 adjncy_wgt[xadj_count] = n_shared[i]; 6480 xadj_count++; 6481 break; 6482 } 6483 } 6484 } 6485 xadj[1] = xadj_count; 6486 ierr = PetscFree(count);CHKERRQ(ierr); 6487 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6488 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6489 6490 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6491 6492 /* Restrict work on active processes only */ 6493 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6494 if (void_procs) { 6495 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6496 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6497 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6498 subcomm = PetscSubcommChild(psubcomm); 6499 } else { 6500 psubcomm = NULL; 6501 subcomm = PetscObjectComm((PetscObject)mat); 6502 } 6503 6504 v_wgt = NULL; 6505 if (!color) { 6506 ierr = PetscFree(xadj);CHKERRQ(ierr); 6507 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6508 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6509 } else { 6510 Mat subdomain_adj; 6511 IS new_ranks,new_ranks_contig; 6512 MatPartitioning partitioner; 6513 PetscInt rstart=0,rend=0; 6514 PetscInt *is_indices,*oldranks; 6515 PetscMPIInt size; 6516 PetscBool aggregate; 6517 6518 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6519 if (void_procs) { 6520 PetscInt prank = rank; 6521 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6522 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6523 for (i=0;i<xadj[1];i++) { 6524 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6525 } 6526 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6527 } else { 6528 oldranks = NULL; 6529 } 6530 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6531 if (aggregate) { /* TODO: all this part could be made more efficient */ 6532 PetscInt lrows,row,ncols,*cols; 6533 PetscMPIInt nrank; 6534 PetscScalar *vals; 6535 6536 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6537 lrows = 0; 6538 if (nrank<redprocs) { 6539 lrows = size/redprocs; 6540 if (nrank<size%redprocs) lrows++; 6541 } 6542 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6543 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6544 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6545 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6546 row = nrank; 6547 ncols = xadj[1]-xadj[0]; 6548 cols = adjncy; 6549 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6550 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6551 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6552 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6553 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6554 ierr = PetscFree(xadj);CHKERRQ(ierr); 6555 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6556 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6557 ierr = PetscFree(vals);CHKERRQ(ierr); 6558 if (use_vwgt) { 6559 Vec v; 6560 const PetscScalar *array; 6561 PetscInt nl; 6562 6563 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6564 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6565 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6566 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6567 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6568 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6569 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6570 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6571 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6572 ierr = VecDestroy(&v);CHKERRQ(ierr); 6573 } 6574 } else { 6575 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6576 if (use_vwgt) { 6577 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6578 v_wgt[0] = n; 6579 } 6580 } 6581 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6582 6583 /* Partition */ 6584 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6585 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6586 if (v_wgt) { 6587 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6588 } 6589 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6590 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6591 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6592 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6593 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6594 6595 /* renumber new_ranks to avoid "holes" in new set of processors */ 6596 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6597 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6598 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6599 if (!aggregate) { 6600 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6601 #if defined(PETSC_USE_DEBUG) 6602 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6603 #endif 6604 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6605 } else if (oldranks) { 6606 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6607 } else { 6608 ranks_send_to_idx[0] = is_indices[0]; 6609 } 6610 } else { 6611 PetscInt idxs[1]; 6612 PetscMPIInt tag; 6613 MPI_Request *reqs; 6614 6615 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6616 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6617 for (i=rstart;i<rend;i++) { 6618 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6619 } 6620 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6621 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6622 ierr = PetscFree(reqs);CHKERRQ(ierr); 6623 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6624 #if defined(PETSC_USE_DEBUG) 6625 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6626 #endif 6627 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6628 } else if (oldranks) { 6629 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6630 } else { 6631 ranks_send_to_idx[0] = idxs[0]; 6632 } 6633 } 6634 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6635 /* clean up */ 6636 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6637 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6638 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6639 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6640 } 6641 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6642 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6643 6644 /* assemble parallel IS for sends */ 6645 i = 1; 6646 if (!color) i=0; 6647 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6648 PetscFunctionReturn(0); 6649 } 6650 6651 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6652 6653 #undef __FUNCT__ 6654 #define __FUNCT__ "PCBDDCMatISSubassemble" 6655 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6656 { 6657 Mat local_mat; 6658 IS is_sends_internal; 6659 PetscInt rows,cols,new_local_rows; 6660 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6661 PetscBool ismatis,isdense,newisdense,destroy_mat; 6662 ISLocalToGlobalMapping l2gmap; 6663 PetscInt* l2gmap_indices; 6664 const PetscInt* is_indices; 6665 MatType new_local_type; 6666 /* buffers */ 6667 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6668 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6669 PetscInt *recv_buffer_idxs_local; 6670 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6671 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6672 /* MPI */ 6673 MPI_Comm comm,comm_n; 6674 PetscSubcomm subcomm; 6675 PetscMPIInt n_sends,n_recvs,commsize; 6676 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6677 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6678 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6679 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6680 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6681 PetscErrorCode ierr; 6682 6683 PetscFunctionBegin; 6684 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6685 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6686 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6687 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6688 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6689 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6690 PetscValidLogicalCollectiveBool(mat,reuse,6); 6691 PetscValidLogicalCollectiveInt(mat,nis,8); 6692 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6693 if (nvecs) { 6694 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6695 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6696 } 6697 /* further checks */ 6698 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6699 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6700 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6701 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6702 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6703 if (reuse && *mat_n) { 6704 PetscInt mrows,mcols,mnrows,mncols; 6705 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6706 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6707 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6708 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6709 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6710 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6711 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6712 } 6713 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6714 PetscValidLogicalCollectiveInt(mat,bs,0); 6715 6716 /* prepare IS for sending if not provided */ 6717 if (!is_sends) { 6718 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6719 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6720 } else { 6721 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6722 is_sends_internal = is_sends; 6723 } 6724 6725 /* get comm */ 6726 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6727 6728 /* compute number of sends */ 6729 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6730 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6731 6732 /* compute number of receives */ 6733 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6734 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6735 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6736 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6737 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6738 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6739 ierr = PetscFree(iflags);CHKERRQ(ierr); 6740 6741 /* restrict comm if requested */ 6742 subcomm = 0; 6743 destroy_mat = PETSC_FALSE; 6744 if (restrict_comm) { 6745 PetscMPIInt color,subcommsize; 6746 6747 color = 0; 6748 if (restrict_full) { 6749 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6750 } else { 6751 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6752 } 6753 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6754 subcommsize = commsize - subcommsize; 6755 /* check if reuse has been requested */ 6756 if (reuse) { 6757 if (*mat_n) { 6758 PetscMPIInt subcommsize2; 6759 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6760 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6761 comm_n = PetscObjectComm((PetscObject)*mat_n); 6762 } else { 6763 comm_n = PETSC_COMM_SELF; 6764 } 6765 } else { /* MAT_INITIAL_MATRIX */ 6766 PetscMPIInt rank; 6767 6768 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6769 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6770 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6771 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6772 comm_n = PetscSubcommChild(subcomm); 6773 } 6774 /* flag to destroy *mat_n if not significative */ 6775 if (color) destroy_mat = PETSC_TRUE; 6776 } else { 6777 comm_n = comm; 6778 } 6779 6780 /* prepare send/receive buffers */ 6781 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6782 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6783 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6784 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6785 if (nis) { 6786 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6787 } 6788 6789 /* Get data from local matrices */ 6790 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6791 /* TODO: See below some guidelines on how to prepare the local buffers */ 6792 /* 6793 send_buffer_vals should contain the raw values of the local matrix 6794 send_buffer_idxs should contain: 6795 - MatType_PRIVATE type 6796 - PetscInt size_of_l2gmap 6797 - PetscInt global_row_indices[size_of_l2gmap] 6798 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6799 */ 6800 else { 6801 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6802 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6803 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6804 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6805 send_buffer_idxs[1] = i; 6806 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6807 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6808 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6809 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6810 for (i=0;i<n_sends;i++) { 6811 ilengths_vals[is_indices[i]] = len*len; 6812 ilengths_idxs[is_indices[i]] = len+2; 6813 } 6814 } 6815 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6816 /* additional is (if any) */ 6817 if (nis) { 6818 PetscMPIInt psum; 6819 PetscInt j; 6820 for (j=0,psum=0;j<nis;j++) { 6821 PetscInt plen; 6822 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6823 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6824 psum += len+1; /* indices + lenght */ 6825 } 6826 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6827 for (j=0,psum=0;j<nis;j++) { 6828 PetscInt plen; 6829 const PetscInt *is_array_idxs; 6830 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6831 send_buffer_idxs_is[psum] = plen; 6832 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6833 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6834 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6835 psum += plen+1; /* indices + lenght */ 6836 } 6837 for (i=0;i<n_sends;i++) { 6838 ilengths_idxs_is[is_indices[i]] = psum; 6839 } 6840 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6841 } 6842 6843 buf_size_idxs = 0; 6844 buf_size_vals = 0; 6845 buf_size_idxs_is = 0; 6846 buf_size_vecs = 0; 6847 for (i=0;i<n_recvs;i++) { 6848 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6849 buf_size_vals += (PetscInt)olengths_vals[i]; 6850 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6851 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6852 } 6853 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6854 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6855 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6856 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6857 6858 /* get new tags for clean communications */ 6859 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6860 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6861 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6862 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6863 6864 /* allocate for requests */ 6865 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6866 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6867 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6868 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6869 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6870 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6871 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6872 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6873 6874 /* communications */ 6875 ptr_idxs = recv_buffer_idxs; 6876 ptr_vals = recv_buffer_vals; 6877 ptr_idxs_is = recv_buffer_idxs_is; 6878 ptr_vecs = recv_buffer_vecs; 6879 for (i=0;i<n_recvs;i++) { 6880 source_dest = onodes[i]; 6881 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6882 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6883 ptr_idxs += olengths_idxs[i]; 6884 ptr_vals += olengths_vals[i]; 6885 if (nis) { 6886 source_dest = onodes_is[i]; 6887 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6888 ptr_idxs_is += olengths_idxs_is[i]; 6889 } 6890 if (nvecs) { 6891 source_dest = onodes[i]; 6892 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6893 ptr_vecs += olengths_idxs[i]-2; 6894 } 6895 } 6896 for (i=0;i<n_sends;i++) { 6897 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6898 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6899 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6900 if (nis) { 6901 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6902 } 6903 if (nvecs) { 6904 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6905 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6906 } 6907 } 6908 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6909 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6910 6911 /* assemble new l2g map */ 6912 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6913 ptr_idxs = recv_buffer_idxs; 6914 new_local_rows = 0; 6915 for (i=0;i<n_recvs;i++) { 6916 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6917 ptr_idxs += olengths_idxs[i]; 6918 } 6919 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6920 ptr_idxs = recv_buffer_idxs; 6921 new_local_rows = 0; 6922 for (i=0;i<n_recvs;i++) { 6923 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6924 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6925 ptr_idxs += olengths_idxs[i]; 6926 } 6927 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6928 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6929 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6930 6931 /* infer new local matrix type from received local matrices type */ 6932 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6933 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6934 if (n_recvs) { 6935 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6936 ptr_idxs = recv_buffer_idxs; 6937 for (i=0;i<n_recvs;i++) { 6938 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6939 new_local_type_private = MATAIJ_PRIVATE; 6940 break; 6941 } 6942 ptr_idxs += olengths_idxs[i]; 6943 } 6944 switch (new_local_type_private) { 6945 case MATDENSE_PRIVATE: 6946 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6947 new_local_type = MATSEQAIJ; 6948 bs = 1; 6949 } else { /* if I receive only 1 dense matrix */ 6950 new_local_type = MATSEQDENSE; 6951 bs = 1; 6952 } 6953 break; 6954 case MATAIJ_PRIVATE: 6955 new_local_type = MATSEQAIJ; 6956 bs = 1; 6957 break; 6958 case MATBAIJ_PRIVATE: 6959 new_local_type = MATSEQBAIJ; 6960 break; 6961 case MATSBAIJ_PRIVATE: 6962 new_local_type = MATSEQSBAIJ; 6963 break; 6964 default: 6965 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6966 break; 6967 } 6968 } else { /* by default, new_local_type is seqdense */ 6969 new_local_type = MATSEQDENSE; 6970 bs = 1; 6971 } 6972 6973 /* create MATIS object if needed */ 6974 if (!reuse) { 6975 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6976 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6977 } else { 6978 /* it also destroys the local matrices */ 6979 if (*mat_n) { 6980 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6981 } else { /* this is a fake object */ 6982 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6983 } 6984 } 6985 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6986 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6987 6988 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6989 6990 /* Global to local map of received indices */ 6991 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6992 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6993 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6994 6995 /* restore attributes -> type of incoming data and its size */ 6996 buf_size_idxs = 0; 6997 for (i=0;i<n_recvs;i++) { 6998 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6999 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7000 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7001 } 7002 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7003 7004 /* set preallocation */ 7005 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7006 if (!newisdense) { 7007 PetscInt *new_local_nnz=0; 7008 7009 ptr_idxs = recv_buffer_idxs_local; 7010 if (n_recvs) { 7011 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7012 } 7013 for (i=0;i<n_recvs;i++) { 7014 PetscInt j; 7015 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7016 for (j=0;j<*(ptr_idxs+1);j++) { 7017 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7018 } 7019 } else { 7020 /* TODO */ 7021 } 7022 ptr_idxs += olengths_idxs[i]; 7023 } 7024 if (new_local_nnz) { 7025 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7026 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7027 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7028 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7029 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7030 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7031 } else { 7032 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7033 } 7034 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7035 } else { 7036 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7037 } 7038 7039 /* set values */ 7040 ptr_vals = recv_buffer_vals; 7041 ptr_idxs = recv_buffer_idxs_local; 7042 for (i=0;i<n_recvs;i++) { 7043 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7044 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7045 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7046 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7047 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7048 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7049 } else { 7050 /* TODO */ 7051 } 7052 ptr_idxs += olengths_idxs[i]; 7053 ptr_vals += olengths_vals[i]; 7054 } 7055 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7056 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7057 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7058 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7059 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7060 7061 #if 0 7062 if (!restrict_comm) { /* check */ 7063 Vec lvec,rvec; 7064 PetscReal infty_error; 7065 7066 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7067 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7068 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7069 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7070 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7071 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7072 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7073 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7074 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7075 } 7076 #endif 7077 7078 /* assemble new additional is (if any) */ 7079 if (nis) { 7080 PetscInt **temp_idxs,*count_is,j,psum; 7081 7082 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7083 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7084 ptr_idxs = recv_buffer_idxs_is; 7085 psum = 0; 7086 for (i=0;i<n_recvs;i++) { 7087 for (j=0;j<nis;j++) { 7088 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7089 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7090 psum += plen; 7091 ptr_idxs += plen+1; /* shift pointer to received data */ 7092 } 7093 } 7094 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7095 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7096 for (i=1;i<nis;i++) { 7097 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7098 } 7099 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7100 ptr_idxs = recv_buffer_idxs_is; 7101 for (i=0;i<n_recvs;i++) { 7102 for (j=0;j<nis;j++) { 7103 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7104 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7105 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7106 ptr_idxs += plen+1; /* shift pointer to received data */ 7107 } 7108 } 7109 for (i=0;i<nis;i++) { 7110 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7111 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7112 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7113 } 7114 ierr = PetscFree(count_is);CHKERRQ(ierr); 7115 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7116 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7117 } 7118 /* free workspace */ 7119 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7120 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7121 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7122 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7123 if (isdense) { 7124 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7125 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7126 } else { 7127 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7128 } 7129 if (nis) { 7130 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7131 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7132 } 7133 7134 if (nvecs) { 7135 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7136 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7137 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7138 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7139 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7140 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7141 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7142 /* set values */ 7143 ptr_vals = recv_buffer_vecs; 7144 ptr_idxs = recv_buffer_idxs_local; 7145 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7146 for (i=0;i<n_recvs;i++) { 7147 PetscInt j; 7148 for (j=0;j<*(ptr_idxs+1);j++) { 7149 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7150 } 7151 ptr_idxs += olengths_idxs[i]; 7152 ptr_vals += olengths_idxs[i]-2; 7153 } 7154 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7155 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7156 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7157 } 7158 7159 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7160 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7161 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7162 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7163 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7164 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7165 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7166 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7167 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7168 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7169 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7170 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7171 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7172 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7173 ierr = PetscFree(onodes);CHKERRQ(ierr); 7174 if (nis) { 7175 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7176 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7177 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7178 } 7179 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7180 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7181 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7182 for (i=0;i<nis;i++) { 7183 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7184 } 7185 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7186 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7187 } 7188 *mat_n = NULL; 7189 } 7190 PetscFunctionReturn(0); 7191 } 7192 7193 /* temporary hack into ksp private data structure */ 7194 #include <petsc/private/kspimpl.h> 7195 7196 #undef __FUNCT__ 7197 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 7198 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7199 { 7200 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7201 PC_IS *pcis = (PC_IS*)pc->data; 7202 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7203 Mat coarsedivudotp = NULL; 7204 Mat coarseG,t_coarse_mat_is; 7205 MatNullSpace CoarseNullSpace = NULL; 7206 ISLocalToGlobalMapping coarse_islg; 7207 IS coarse_is,*isarray; 7208 PetscInt i,im_active=-1,active_procs=-1; 7209 PetscInt nis,nisdofs,nisneu,nisvert; 7210 PC pc_temp; 7211 PCType coarse_pc_type; 7212 KSPType coarse_ksp_type; 7213 PetscBool multilevel_requested,multilevel_allowed; 7214 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7215 PetscInt ncoarse,nedcfield; 7216 PetscBool compute_vecs = PETSC_FALSE; 7217 PetscScalar *array; 7218 MatReuse coarse_mat_reuse; 7219 PetscBool restr, full_restr, have_void; 7220 PetscErrorCode ierr; 7221 7222 PetscFunctionBegin; 7223 /* Assign global numbering to coarse dofs */ 7224 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7225 PetscInt ocoarse_size; 7226 compute_vecs = PETSC_TRUE; 7227 ocoarse_size = pcbddc->coarse_size; 7228 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7229 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7230 /* see if we can avoid some work */ 7231 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7232 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7233 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7234 PC pc; 7235 PetscBool isbddc; 7236 7237 /* temporary workaround since PCBDDC does not have a reset method so far */ 7238 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 7239 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 7240 if (isbddc) { 7241 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 7242 } else { 7243 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7244 } 7245 coarse_reuse = PETSC_FALSE; 7246 } else { /* we can safely reuse already computed coarse matrix */ 7247 coarse_reuse = PETSC_TRUE; 7248 } 7249 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7250 coarse_reuse = PETSC_FALSE; 7251 } 7252 /* reset any subassembling information */ 7253 if (!coarse_reuse || pcbddc->recompute_topography) { 7254 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7255 } 7256 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7257 coarse_reuse = PETSC_TRUE; 7258 } 7259 /* assemble coarse matrix */ 7260 if (coarse_reuse && pcbddc->coarse_ksp) { 7261 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7262 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7263 coarse_mat_reuse = MAT_REUSE_MATRIX; 7264 } else { 7265 coarse_mat = NULL; 7266 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7267 } 7268 7269 /* creates temporary l2gmap and IS for coarse indexes */ 7270 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7271 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7272 7273 /* creates temporary MATIS object for coarse matrix */ 7274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7275 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7276 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7277 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7278 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7279 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7280 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7281 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7282 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7283 7284 /* count "active" (i.e. with positive local size) and "void" processes */ 7285 im_active = !!(pcis->n); 7286 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7287 7288 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7289 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7290 /* full_restr : just use the receivers from the subassembling pattern */ 7291 coarse_mat_is = NULL; 7292 multilevel_allowed = PETSC_FALSE; 7293 multilevel_requested = PETSC_FALSE; 7294 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7295 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7296 if (multilevel_requested) { 7297 ncoarse = active_procs/pcbddc->coarsening_ratio; 7298 restr = PETSC_FALSE; 7299 full_restr = PETSC_FALSE; 7300 } else { 7301 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7302 restr = PETSC_TRUE; 7303 full_restr = PETSC_TRUE; 7304 } 7305 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7306 ncoarse = PetscMax(1,ncoarse); 7307 if (!pcbddc->coarse_subassembling) { 7308 if (pcbddc->coarsening_ratio > 1) { 7309 if (multilevel_requested) { 7310 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7311 } else { 7312 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7313 } 7314 } else { 7315 PetscMPIInt size,rank; 7316 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7317 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7318 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7319 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7320 } 7321 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7322 PetscInt psum; 7323 PetscMPIInt size; 7324 if (pcbddc->coarse_ksp) psum = 1; 7325 else psum = 0; 7326 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7327 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7328 if (ncoarse < size) have_void = PETSC_TRUE; 7329 } 7330 /* determine if we can go multilevel */ 7331 if (multilevel_requested) { 7332 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7333 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7334 } 7335 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7336 7337 /* dump subassembling pattern */ 7338 if (pcbddc->dbg_flag && multilevel_allowed) { 7339 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7340 } 7341 7342 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7343 nedcfield = -1; 7344 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7345 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7346 const PetscInt *idxs; 7347 ISLocalToGlobalMapping tmap; 7348 7349 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7350 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7351 /* allocate space for temporary storage */ 7352 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7353 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7354 /* allocate for IS array */ 7355 nisdofs = pcbddc->n_ISForDofsLocal; 7356 if (pcbddc->nedclocal) { 7357 if (pcbddc->nedfield > -1) { 7358 nedcfield = pcbddc->nedfield; 7359 } else { 7360 nedcfield = 0; 7361 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7362 nisdofs = 1; 7363 } 7364 } 7365 nisneu = !!pcbddc->NeumannBoundariesLocal; 7366 nisvert = 0; /* nisvert is not used */ 7367 nis = nisdofs + nisneu + nisvert; 7368 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7369 /* dofs splitting */ 7370 for (i=0;i<nisdofs;i++) { 7371 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7372 if (nedcfield != i) { 7373 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7374 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7375 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7376 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7377 } else { 7378 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7379 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7380 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7381 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7382 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7383 } 7384 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7385 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7386 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7387 } 7388 /* neumann boundaries */ 7389 if (pcbddc->NeumannBoundariesLocal) { 7390 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7391 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7392 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7393 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7394 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7395 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7396 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7397 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7398 } 7399 /* free memory */ 7400 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7401 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7402 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7403 } else { 7404 nis = 0; 7405 nisdofs = 0; 7406 nisneu = 0; 7407 nisvert = 0; 7408 isarray = NULL; 7409 } 7410 /* destroy no longer needed map */ 7411 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7412 7413 /* subassemble */ 7414 if (multilevel_allowed) { 7415 Vec vp[1]; 7416 PetscInt nvecs = 0; 7417 PetscBool reuse,reuser; 7418 7419 if (coarse_mat) reuse = PETSC_TRUE; 7420 else reuse = PETSC_FALSE; 7421 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7422 vp[0] = NULL; 7423 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7424 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7425 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7426 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7427 nvecs = 1; 7428 7429 if (pcbddc->divudotp) { 7430 Mat B,loc_divudotp; 7431 Vec v,p; 7432 IS dummy; 7433 PetscInt np; 7434 7435 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7436 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7437 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7438 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7439 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7440 ierr = VecSet(p,1.);CHKERRQ(ierr); 7441 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7442 ierr = VecDestroy(&p);CHKERRQ(ierr); 7443 ierr = MatDestroy(&B);CHKERRQ(ierr); 7444 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7445 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7446 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7447 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7448 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7449 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7450 ierr = VecDestroy(&v);CHKERRQ(ierr); 7451 } 7452 } 7453 if (reuser) { 7454 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7455 } else { 7456 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7457 } 7458 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7459 PetscScalar *arraym,*arrayv; 7460 PetscInt nl; 7461 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7462 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7463 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7464 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7465 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7466 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7467 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7468 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7469 } else { 7470 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7471 } 7472 } else { 7473 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7474 } 7475 if (coarse_mat_is || coarse_mat) { 7476 PetscMPIInt size; 7477 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7478 if (!multilevel_allowed) { 7479 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7480 } else { 7481 Mat A; 7482 7483 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7484 if (coarse_mat_is) { 7485 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7486 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7487 coarse_mat = coarse_mat_is; 7488 } 7489 /* be sure we don't have MatSeqDENSE as local mat */ 7490 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7491 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7492 } 7493 } 7494 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7495 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7496 7497 /* create local to global scatters for coarse problem */ 7498 if (compute_vecs) { 7499 PetscInt lrows; 7500 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7501 if (coarse_mat) { 7502 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7503 } else { 7504 lrows = 0; 7505 } 7506 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7507 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7508 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7509 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7510 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7511 } 7512 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7513 7514 /* set defaults for coarse KSP and PC */ 7515 if (multilevel_allowed) { 7516 coarse_ksp_type = KSPRICHARDSON; 7517 coarse_pc_type = PCBDDC; 7518 } else { 7519 coarse_ksp_type = KSPPREONLY; 7520 coarse_pc_type = PCREDUNDANT; 7521 } 7522 7523 /* print some info if requested */ 7524 if (pcbddc->dbg_flag) { 7525 if (!multilevel_allowed) { 7526 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7527 if (multilevel_requested) { 7528 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7529 } else if (pcbddc->max_levels) { 7530 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7531 } 7532 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7533 } 7534 } 7535 7536 /* communicate coarse discrete gradient */ 7537 coarseG = NULL; 7538 if (pcbddc->nedcG && multilevel_allowed) { 7539 MPI_Comm ccomm; 7540 if (coarse_mat) { 7541 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7542 } else { 7543 ccomm = MPI_COMM_NULL; 7544 } 7545 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7546 } 7547 7548 /* create the coarse KSP object only once with defaults */ 7549 if (coarse_mat) { 7550 PetscViewer dbg_viewer = NULL; 7551 if (pcbddc->dbg_flag) { 7552 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7553 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7554 } 7555 if (!pcbddc->coarse_ksp) { 7556 char prefix[256],str_level[16]; 7557 size_t len; 7558 7559 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7560 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7561 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7562 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7563 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7564 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7565 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7566 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7567 /* TODO is this logic correct? should check for coarse_mat type */ 7568 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7569 /* prefix */ 7570 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7571 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7572 if (!pcbddc->current_level) { 7573 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7574 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7575 } else { 7576 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7577 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7578 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7579 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7580 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7581 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7582 } 7583 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7584 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7585 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7586 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7587 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7588 /* allow user customization */ 7589 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7590 } 7591 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7592 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7593 if (nisdofs) { 7594 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7595 for (i=0;i<nisdofs;i++) { 7596 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7597 } 7598 } 7599 if (nisneu) { 7600 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7601 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7602 } 7603 if (nisvert) { 7604 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7605 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7606 } 7607 if (coarseG) { 7608 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7609 } 7610 7611 /* get some info after set from options */ 7612 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7613 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7614 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7615 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7616 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7617 isbddc = PETSC_FALSE; 7618 } 7619 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7620 if (isredundant) { 7621 KSP inner_ksp; 7622 PC inner_pc; 7623 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7624 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7625 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7626 } 7627 7628 /* parameters which miss an API */ 7629 if (isbddc) { 7630 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7631 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7632 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7633 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7634 if (pcbddc_coarse->benign_saddle_point) { 7635 Mat coarsedivudotp_is; 7636 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7637 IS row,col; 7638 const PetscInt *gidxs; 7639 PetscInt n,st,M,N; 7640 7641 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7642 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7643 st = st-n; 7644 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7645 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7646 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7647 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7648 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7649 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7650 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7651 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7652 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7653 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7654 ierr = ISDestroy(&row);CHKERRQ(ierr); 7655 ierr = ISDestroy(&col);CHKERRQ(ierr); 7656 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7657 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7658 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7659 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7660 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7661 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7662 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7663 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7664 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7665 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7666 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7667 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7668 } 7669 } 7670 7671 /* propagate symmetry info of coarse matrix */ 7672 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7673 if (pc->pmat->symmetric_set) { 7674 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7675 } 7676 if (pc->pmat->hermitian_set) { 7677 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7678 } 7679 if (pc->pmat->spd_set) { 7680 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7681 } 7682 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7683 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7684 } 7685 /* set operators */ 7686 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7687 if (pcbddc->dbg_flag) { 7688 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7689 } 7690 } 7691 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7692 ierr = PetscFree(isarray);CHKERRQ(ierr); 7693 #if 0 7694 { 7695 PetscViewer viewer; 7696 char filename[256]; 7697 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7698 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7699 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7700 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7701 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7702 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7703 } 7704 #endif 7705 7706 if (pcbddc->coarse_ksp) { 7707 Vec crhs,csol; 7708 7709 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7710 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7711 if (!csol) { 7712 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7713 } 7714 if (!crhs) { 7715 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7716 } 7717 } 7718 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7719 7720 /* compute null space for coarse solver if the benign trick has been requested */ 7721 if (pcbddc->benign_null) { 7722 7723 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7724 for (i=0;i<pcbddc->benign_n;i++) { 7725 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7726 } 7727 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7728 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7729 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7730 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7731 if (coarse_mat) { 7732 Vec nullv; 7733 PetscScalar *array,*array2; 7734 PetscInt nl; 7735 7736 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7737 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7738 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7739 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7740 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7741 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7742 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7743 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7744 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7745 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7746 } 7747 } 7748 7749 if (pcbddc->coarse_ksp) { 7750 PetscBool ispreonly; 7751 7752 if (CoarseNullSpace) { 7753 PetscBool isnull; 7754 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7755 if (isnull) { 7756 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7757 } 7758 /* TODO: add local nullspaces (if any) */ 7759 } 7760 /* setup coarse ksp */ 7761 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7762 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7763 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7764 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7765 KSP check_ksp; 7766 KSPType check_ksp_type; 7767 PC check_pc; 7768 Vec check_vec,coarse_vec; 7769 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7770 PetscInt its; 7771 PetscBool compute_eigs; 7772 PetscReal *eigs_r,*eigs_c; 7773 PetscInt neigs; 7774 const char *prefix; 7775 7776 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7777 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7778 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7779 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7780 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7781 /* prevent from setup unneeded object */ 7782 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7783 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7784 if (ispreonly) { 7785 check_ksp_type = KSPPREONLY; 7786 compute_eigs = PETSC_FALSE; 7787 } else { 7788 check_ksp_type = KSPGMRES; 7789 compute_eigs = PETSC_TRUE; 7790 } 7791 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7792 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7793 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7794 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7795 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7796 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7797 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7798 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7799 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7800 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7801 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7802 /* create random vec */ 7803 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7804 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7805 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7806 /* solve coarse problem */ 7807 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7808 /* set eigenvalue estimation if preonly has not been requested */ 7809 if (compute_eigs) { 7810 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7811 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7812 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7813 if (neigs) { 7814 lambda_max = eigs_r[neigs-1]; 7815 lambda_min = eigs_r[0]; 7816 if (pcbddc->use_coarse_estimates) { 7817 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7818 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7819 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7820 } 7821 } 7822 } 7823 } 7824 7825 /* check coarse problem residual error */ 7826 if (pcbddc->dbg_flag) { 7827 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7828 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7829 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7830 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7831 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7832 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7833 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7834 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7835 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7836 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7837 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7838 if (CoarseNullSpace) { 7839 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7840 } 7841 if (compute_eigs) { 7842 PetscReal lambda_max_s,lambda_min_s; 7843 KSPConvergedReason reason; 7844 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7845 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7846 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7847 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7848 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7849 for (i=0;i<neigs;i++) { 7850 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7851 } 7852 } 7853 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7854 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7855 } 7856 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7857 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7858 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7859 if (compute_eigs) { 7860 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7861 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7862 } 7863 } 7864 } 7865 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7866 /* print additional info */ 7867 if (pcbddc->dbg_flag) { 7868 /* waits until all processes reaches this point */ 7869 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7870 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7871 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7872 } 7873 7874 /* free memory */ 7875 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7876 PetscFunctionReturn(0); 7877 } 7878 7879 #undef __FUNCT__ 7880 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7881 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7882 { 7883 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7884 PC_IS* pcis = (PC_IS*)pc->data; 7885 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7886 IS subset,subset_mult,subset_n; 7887 PetscInt local_size,coarse_size=0; 7888 PetscInt *local_primal_indices=NULL; 7889 const PetscInt *t_local_primal_indices; 7890 PetscErrorCode ierr; 7891 7892 PetscFunctionBegin; 7893 /* Compute global number of coarse dofs */ 7894 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7895 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7896 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7897 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7898 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7899 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7900 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7901 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7902 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7903 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7904 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7905 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7906 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7907 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7908 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7909 7910 /* check numbering */ 7911 if (pcbddc->dbg_flag) { 7912 PetscScalar coarsesum,*array,*array2; 7913 PetscInt i; 7914 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7915 7916 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7917 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7918 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7919 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7920 /* counter */ 7921 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7922 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7923 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7924 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7925 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7926 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7927 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7928 for (i=0;i<pcbddc->local_primal_size;i++) { 7929 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7930 } 7931 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7932 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7933 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7934 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7935 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7936 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7937 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7938 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7939 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7940 for (i=0;i<pcis->n;i++) { 7941 if (array[i] != 0.0 && array[i] != array2[i]) { 7942 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7943 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7944 set_error = PETSC_TRUE; 7945 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7947 } 7948 } 7949 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7950 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7951 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7952 for (i=0;i<pcis->n;i++) { 7953 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7954 } 7955 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7956 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7957 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7958 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7959 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7960 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7961 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7962 PetscInt *gidxs; 7963 7964 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7965 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7966 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7969 for (i=0;i<pcbddc->local_primal_size;i++) { 7970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7971 } 7972 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7973 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7974 } 7975 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7976 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7977 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7978 } 7979 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7980 /* get back data */ 7981 *coarse_size_n = coarse_size; 7982 *local_primal_indices_n = local_primal_indices; 7983 PetscFunctionReturn(0); 7984 } 7985 7986 #undef __FUNCT__ 7987 #define __FUNCT__ "PCBDDCGlobalToLocal" 7988 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7989 { 7990 IS localis_t; 7991 PetscInt i,lsize,*idxs,n; 7992 PetscScalar *vals; 7993 PetscErrorCode ierr; 7994 7995 PetscFunctionBegin; 7996 /* get indices in local ordering exploiting local to global map */ 7997 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7998 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7999 for (i=0;i<lsize;i++) vals[i] = 1.0; 8000 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8001 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8002 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8003 if (idxs) { /* multilevel guard */ 8004 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8005 } 8006 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8007 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8008 ierr = PetscFree(vals);CHKERRQ(ierr); 8009 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8010 /* now compute set in local ordering */ 8011 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8012 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8013 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8014 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8015 for (i=0,lsize=0;i<n;i++) { 8016 if (PetscRealPart(vals[i]) > 0.5) { 8017 lsize++; 8018 } 8019 } 8020 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8021 for (i=0,lsize=0;i<n;i++) { 8022 if (PetscRealPart(vals[i]) > 0.5) { 8023 idxs[lsize++] = i; 8024 } 8025 } 8026 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8027 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8028 *localis = localis_t; 8029 PetscFunctionReturn(0); 8030 } 8031 8032 #undef __FUNCT__ 8033 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 8034 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8035 { 8036 PC_IS *pcis=(PC_IS*)pc->data; 8037 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8038 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8039 Mat S_j; 8040 PetscInt *used_xadj,*used_adjncy; 8041 PetscBool free_used_adj; 8042 PetscErrorCode ierr; 8043 8044 PetscFunctionBegin; 8045 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8046 free_used_adj = PETSC_FALSE; 8047 if (pcbddc->sub_schurs_layers == -1) { 8048 used_xadj = NULL; 8049 used_adjncy = NULL; 8050 } else { 8051 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8052 used_xadj = pcbddc->mat_graph->xadj; 8053 used_adjncy = pcbddc->mat_graph->adjncy; 8054 } else if (pcbddc->computed_rowadj) { 8055 used_xadj = pcbddc->mat_graph->xadj; 8056 used_adjncy = pcbddc->mat_graph->adjncy; 8057 } else { 8058 PetscBool flg_row=PETSC_FALSE; 8059 const PetscInt *xadj,*adjncy; 8060 PetscInt nvtxs; 8061 8062 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8063 if (flg_row) { 8064 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8065 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8066 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8067 free_used_adj = PETSC_TRUE; 8068 } else { 8069 pcbddc->sub_schurs_layers = -1; 8070 used_xadj = NULL; 8071 used_adjncy = NULL; 8072 } 8073 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8074 } 8075 } 8076 8077 /* setup sub_schurs data */ 8078 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8079 if (!sub_schurs->schur_explicit) { 8080 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8081 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8082 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8083 } else { 8084 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8085 PetscBool isseqaij,need_change = PETSC_FALSE; 8086 PetscInt benign_n; 8087 Mat change = NULL; 8088 Vec scaling = NULL; 8089 IS change_primal = NULL; 8090 8091 if (!pcbddc->use_vertices && reuse_solvers) { 8092 PetscInt n_vertices; 8093 8094 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8095 reuse_solvers = (PetscBool)!n_vertices; 8096 } 8097 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8098 if (!isseqaij) { 8099 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8100 if (matis->A == pcbddc->local_mat) { 8101 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8102 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8103 } else { 8104 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8105 } 8106 } 8107 if (!pcbddc->benign_change_explicit) { 8108 benign_n = pcbddc->benign_n; 8109 } else { 8110 benign_n = 0; 8111 } 8112 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8113 We need a global reduction to avoid possible deadlocks. 8114 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8115 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8116 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8117 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8118 need_change = (PetscBool)(!need_change); 8119 } 8120 /* If the user defines additional constraints, we import them here. 8121 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8122 if (need_change) { 8123 PC_IS *pcisf; 8124 PC_BDDC *pcbddcf; 8125 PC pcf; 8126 8127 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8128 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8129 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8130 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8131 /* hacks */ 8132 pcisf = (PC_IS*)pcf->data; 8133 pcisf->is_B_local = pcis->is_B_local; 8134 pcisf->vec1_N = pcis->vec1_N; 8135 pcisf->BtoNmap = pcis->BtoNmap; 8136 pcisf->n = pcis->n; 8137 pcisf->n_B = pcis->n_B; 8138 pcbddcf = (PC_BDDC*)pcf->data; 8139 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8140 pcbddcf->mat_graph = pcbddc->mat_graph; 8141 pcbddcf->use_faces = PETSC_TRUE; 8142 pcbddcf->use_change_of_basis = PETSC_TRUE; 8143 pcbddcf->use_change_on_faces = PETSC_TRUE; 8144 pcbddcf->use_qr_single = PETSC_TRUE; 8145 pcbddcf->fake_change = PETSC_TRUE; 8146 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8147 /* store information on primal vertices and change of basis (in local numbering) */ 8148 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8149 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8150 change = pcbddcf->ConstraintMatrix; 8151 pcbddcf->ConstraintMatrix = NULL; 8152 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8153 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8154 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8155 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8156 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8157 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8158 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8159 pcf->ops->destroy = NULL; 8160 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8161 } 8162 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8163 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8164 ierr = MatDestroy(&change);CHKERRQ(ierr); 8165 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8166 } 8167 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8168 8169 /* free adjacency */ 8170 if (free_used_adj) { 8171 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8172 } 8173 PetscFunctionReturn(0); 8174 } 8175 8176 #undef __FUNCT__ 8177 #define __FUNCT__ "PCBDDCInitSubSchurs" 8178 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8179 { 8180 PC_IS *pcis=(PC_IS*)pc->data; 8181 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8182 PCBDDCGraph graph; 8183 PetscErrorCode ierr; 8184 8185 PetscFunctionBegin; 8186 /* attach interface graph for determining subsets */ 8187 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8188 IS verticesIS,verticescomm; 8189 PetscInt vsize,*idxs; 8190 8191 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8192 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8193 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8194 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8195 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8196 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8197 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8198 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8199 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8200 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8201 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8202 } else { 8203 graph = pcbddc->mat_graph; 8204 } 8205 /* print some info */ 8206 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8207 IS vertices; 8208 PetscInt nv,nedges,nfaces; 8209 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8210 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8211 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8212 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8213 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8217 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8218 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8219 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8220 } 8221 8222 /* sub_schurs init */ 8223 if (!pcbddc->sub_schurs) { 8224 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8225 } 8226 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8227 8228 /* free graph struct */ 8229 if (pcbddc->sub_schurs_rebuild) { 8230 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8231 } 8232 PetscFunctionReturn(0); 8233 } 8234 8235 #undef __FUNCT__ 8236 #define __FUNCT__ "PCBDDCCheckOperator" 8237 PetscErrorCode PCBDDCCheckOperator(PC pc) 8238 { 8239 PC_IS *pcis=(PC_IS*)pc->data; 8240 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8241 PetscErrorCode ierr; 8242 8243 PetscFunctionBegin; 8244 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8245 IS zerodiag = NULL; 8246 Mat S_j,B0_B=NULL; 8247 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8248 PetscScalar *p0_check,*array,*array2; 8249 PetscReal norm; 8250 PetscInt i; 8251 8252 /* B0 and B0_B */ 8253 if (zerodiag) { 8254 IS dummy; 8255 8256 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8257 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8258 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8259 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8260 } 8261 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8262 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8263 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8264 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8265 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8266 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8267 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8268 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8269 /* S_j */ 8270 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8271 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8272 8273 /* mimic vector in \widetilde{W}_\Gamma */ 8274 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8275 /* continuous in primal space */ 8276 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8277 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8278 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8279 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8280 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8281 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8282 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8283 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8284 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8285 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8286 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8287 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8288 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8289 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8290 8291 /* assemble rhs for coarse problem */ 8292 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8293 /* local with Schur */ 8294 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8295 if (zerodiag) { 8296 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8297 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8298 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8299 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8300 } 8301 /* sum on primal nodes the local contributions */ 8302 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8303 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8304 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8305 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8306 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8307 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8308 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8309 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8310 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8311 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8312 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8313 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8314 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8315 /* scale primal nodes (BDDC sums contibutions) */ 8316 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8317 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8318 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8319 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8320 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8321 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8322 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8323 /* global: \widetilde{B0}_B w_\Gamma */ 8324 if (zerodiag) { 8325 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8326 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8327 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8328 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8329 } 8330 /* BDDC */ 8331 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8332 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8333 8334 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8335 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8336 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8337 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8338 for (i=0;i<pcbddc->benign_n;i++) { 8339 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8340 } 8341 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8342 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8343 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8344 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8345 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8346 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8347 } 8348 PetscFunctionReturn(0); 8349 } 8350 8351 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8352 #undef __FUNCT__ 8353 #define __FUNCT__ "MatMPIAIJRestrict" 8354 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8355 { 8356 Mat At; 8357 IS rows; 8358 PetscInt rst,ren; 8359 PetscErrorCode ierr; 8360 PetscLayout rmap; 8361 8362 PetscFunctionBegin; 8363 rst = ren = 0; 8364 if (ccomm != MPI_COMM_NULL) { 8365 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8366 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8367 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8368 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8369 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8370 } 8371 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8372 ierr = MatGetSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8373 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8374 8375 if (ccomm != MPI_COMM_NULL) { 8376 Mat_MPIAIJ *a,*b; 8377 IS from,to; 8378 Vec gvec; 8379 PetscInt lsize; 8380 8381 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8382 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8383 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8384 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8385 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8386 a = (Mat_MPIAIJ*)At->data; 8387 b = (Mat_MPIAIJ*)(*B)->data; 8388 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8389 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8390 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8391 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8392 b->A = a->A; 8393 b->B = a->B; 8394 8395 b->donotstash = a->donotstash; 8396 b->roworiented = a->roworiented; 8397 b->rowindices = 0; 8398 b->rowvalues = 0; 8399 b->getrowactive = PETSC_FALSE; 8400 8401 (*B)->rmap = rmap; 8402 (*B)->factortype = A->factortype; 8403 (*B)->assembled = PETSC_TRUE; 8404 (*B)->insertmode = NOT_SET_VALUES; 8405 (*B)->preallocated = PETSC_TRUE; 8406 8407 if (a->colmap) { 8408 #if defined(PETSC_USE_CTABLE) 8409 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8410 #else 8411 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8412 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8413 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8414 #endif 8415 } else b->colmap = 0; 8416 if (a->garray) { 8417 PetscInt len; 8418 len = a->B->cmap->n; 8419 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8420 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8421 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8422 } else b->garray = 0; 8423 8424 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8425 b->lvec = a->lvec; 8426 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8427 8428 /* cannot use VecScatterCopy */ 8429 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8430 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8431 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8432 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8433 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8434 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8435 ierr = ISDestroy(&from);CHKERRQ(ierr); 8436 ierr = ISDestroy(&to);CHKERRQ(ierr); 8437 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8438 } 8439 ierr = MatDestroy(&At);CHKERRQ(ierr); 8440 PetscFunctionReturn(0); 8441 } 8442