1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* if range is true, it returns B s.t. span{B} = range(A) 10 if range is false, it returns B s.t. range(B) _|_ range(A) */ 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatDenseOrthogonalRangeOrComplement" 13 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 14 { 15 #if !defined(PETSC_USE_COMPLEX) 16 PetscScalar *uwork,*data,*U, ds = 0.; 17 PetscReal *sing; 18 PetscBLASInt bM,bN,lwork,lierr,di = 1; 19 PetscInt ulw,i,nr,nc,n; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 #if defined(PETSC_MISSING_LAPACK_GESVD) 24 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 25 #endif 26 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 49 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 50 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 51 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 52 ierr = PetscFPTrapPop();CHKERRQ(ierr); 53 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 54 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 55 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 56 if (!rwork) { 57 ierr = PetscFree(sing);CHKERRQ(ierr); 58 } 59 if (!work) { 60 ierr = PetscFree(uwork);CHKERRQ(ierr); 61 } 62 /* create B */ 63 if (!range) { 64 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 65 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 67 } else { 68 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 69 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 70 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 71 } 72 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscFree(U);CHKERRQ(ierr); 74 #else 75 PetscFunctionBegin; 76 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 77 #endif 78 PetscFunctionReturn(0); 79 } 80 81 /* TODO REMOVE */ 82 #if defined(PRINT_GDET) 83 static int inc = 0; 84 static int lev = 0; 85 #endif 86 87 #undef __FUNCT__ 88 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 PetscErrorCode ierr; 92 Mat GE,GEd; 93 PetscInt rsize,csize,esize; 94 PetscScalar *ptr; 95 96 PetscFunctionBegin; 97 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 98 if (!esize) PetscFunctionReturn(0); 99 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 100 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 101 102 /* gradients */ 103 ptr = work + 5*esize; 104 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 105 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 106 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 107 ierr = MatDestroy(&GE);CHKERRQ(ierr); 108 109 /* constants */ 110 ptr += rsize*csize; 111 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 112 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 113 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 114 ierr = MatDestroy(&GE);CHKERRQ(ierr); 115 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 116 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 117 118 if (corners) { 119 Mat GEc; 120 PetscScalar *vals,v; 121 122 ierr = MatGetSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 123 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 124 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 130 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 137 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 138 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 139 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 141 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 142 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 143 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 144 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 145 } 146 #endif 147 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 148 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 #undef __FUNCT__ 155 #define __FUNCT__ "PCBDDCNedelecSupport" 156 PetscErrorCode PCBDDCNedelecSupport(PC pc) 157 { 158 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 159 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 160 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 161 Vec tvec; 162 PetscSF sfv; 163 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 164 MPI_Comm comm; 165 IS lned,primals,allprimals,nedfieldlocal; 166 IS *eedges,*extrows,*extcols,*alleedges; 167 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 168 PetscScalar *vals,*work; 169 PetscReal *rwork; 170 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 171 PetscInt ne,nv,Lv,order,n,field; 172 PetscInt n_neigh,*neigh,*n_shared,**shared; 173 PetscInt i,j,extmem,cum,maxsize,nee; 174 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 175 PetscInt *sfvleaves,*sfvroots; 176 PetscInt *corners,*cedges; 177 PetscInt *ecount,**eneighs,*vcount,**vneighs; 178 #if defined(PETSC_USE_DEBUG) 179 PetscInt *emarks; 180 #endif 181 PetscBool print,eerr,done,lrc[2],conforming,global; 182 PetscErrorCode ierr; 183 184 PetscFunctionBegin; 185 /* test variable order code and print debug info TODO: to be removed */ 186 print = PETSC_FALSE; 187 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 188 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 189 190 /* Return to caller if there are no edges in the decomposition */ 191 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 192 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 193 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 194 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 195 lrc[0] = PETSC_FALSE; 196 for (i=0;i<n;i++) { 197 if (PetscRealPart(vals[i]) > 2.) { 198 lrc[0] = PETSC_TRUE; 199 break; 200 } 201 } 202 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 203 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 204 if (!lrc[1]) PetscFunctionReturn(0); 205 206 /* If the discrete gradient is defined for a subset of dofs and global is true, 207 it assumes G is given in global ordering for all the dofs. 208 Otherwise, the ordering is global for the Nedelec field */ 209 order = pcbddc->nedorder; 210 conforming = pcbddc->conforming; 211 field = pcbddc->nedfield; 212 global = pcbddc->nedglobal; 213 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 214 if (pcbddc->n_ISForDofsLocal && field > -1) { 215 PetscBool setprimal = PETSC_FALSE; 216 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field_primal",&setprimal,NULL);CHKERRQ(ierr); 217 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 218 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 219 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 220 if (setprimal) { 221 IS enedfieldlocal; 222 PetscInt *eidxs; 223 224 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 225 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 226 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 227 for (i=0,cum=0;i<ne;i++) { 228 if (PetscRealPart(vals[idxs[i]]) > 2.) { 229 eidxs[cum++] = idxs[i]; 230 } 231 } 232 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 233 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 234 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 235 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 236 ierr = PetscFree(eidxs);CHKERRQ(ierr); 237 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 238 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 } else if (!pcbddc->n_ISForDofsLocal) { 242 PetscBool testnedfield = PETSC_FALSE; 243 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 244 if (!testnedfield) { 245 ne = n; 246 nedfieldlocal = NULL; 247 } else { 248 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 249 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 250 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 251 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 252 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 253 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 254 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 255 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 256 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 257 for (i=0,cum=0;i<n;i++) { 258 if (matis->sf_leafdata[i] > 1) { 259 matis->sf_leafdata[cum++] = i; 260 } 261 } 262 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 263 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 264 } 265 global = PETSC_TRUE; 266 } else { 267 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 268 } 269 270 if (nedfieldlocal) { /* merge with previous code when testing is done */ 271 IS is; 272 273 /* need to map from the local Nedelec field to local numbering */ 274 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 275 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 276 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 277 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 278 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 279 if (global) { 280 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 281 el2g = al2g; 282 } else { 283 IS gis; 284 285 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 286 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 287 ierr = ISDestroy(&gis);CHKERRQ(ierr); 288 } 289 ierr = ISDestroy(&is);CHKERRQ(ierr); 290 } else { 291 /* restore default */ 292 pcbddc->nedfield = -1; 293 /* one ref for the destruction of al2g, one for el2g */ 294 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 295 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 296 el2g = al2g; 297 fl2g = NULL; 298 } 299 300 /* Sanity checks */ 301 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 302 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 303 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 304 305 /* Drop connections for interior edges */ 306 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 307 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 308 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 309 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 310 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 311 if (nedfieldlocal) { 312 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 313 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 314 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 315 } else { 316 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 317 } 318 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 319 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 320 if (global) { 321 PetscInt rst; 322 323 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 324 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 325 if (matis->sf_rootdata[i] < 2) { 326 matis->sf_rootdata[cum++] = i + rst; 327 } 328 } 329 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 330 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 331 } else { 332 PetscInt *tbz; 333 334 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 335 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 336 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 337 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 338 for (i=0,cum=0;i<ne;i++) 339 if (matis->sf_leafdata[idxs[i]] == 1) 340 tbz[cum++] = i; 341 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 342 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 343 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 344 ierr = PetscFree(tbz);CHKERRQ(ierr); 345 } 346 347 /* Extract subdomain relevant rows of G */ 348 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 349 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 350 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 351 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 352 ierr = ISDestroy(&lned);CHKERRQ(ierr); 353 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 354 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 355 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 356 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 357 if (print) { 358 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 359 ierr = MatView(lG,NULL);CHKERRQ(ierr); 360 } 361 362 /* SF for nodal communications */ 363 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 364 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 365 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 366 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 367 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 368 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 369 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 370 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 371 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 372 373 /* Destroy temporary G created in MATIS format and modified G */ 374 ierr = MatDestroy(&G);CHKERRQ(ierr); 375 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 376 377 /* Save lG */ 378 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 379 380 /* Analyze the edge-nodes connections (duplicate lG) */ 381 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 382 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 383 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 384 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 385 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 386 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 387 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 388 /* need to import the boundary specification to ensure the 389 proper detection of coarse edges' endpoints */ 390 if (pcbddc->DirichletBoundariesLocal) { 391 IS is; 392 393 if (fl2g) { 394 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 395 } else { 396 is = pcbddc->DirichletBoundariesLocal; 397 } 398 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 399 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 400 for (i=0;i<cum;i++) { 401 if (idxs[i] >= 0) { 402 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 403 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 404 } 405 } 406 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 407 if (fl2g) { 408 ierr = ISDestroy(&is);CHKERRQ(ierr); 409 } 410 } 411 if (pcbddc->NeumannBoundariesLocal) { 412 IS is; 413 414 if (fl2g) { 415 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 416 } else { 417 is = pcbddc->NeumannBoundariesLocal; 418 } 419 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 420 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 421 for (i=0;i<cum;i++) { 422 if (idxs[i] >= 0) { 423 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 424 } 425 } 426 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 427 if (fl2g) { 428 ierr = ISDestroy(&is);CHKERRQ(ierr); 429 } 430 } 431 432 /* count neighs per dof */ 433 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 434 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 435 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 436 for (i=1,cum=0;i<n_neigh;i++) { 437 cum += n_shared[i]; 438 for (j=0;j<n_shared[i];j++) { 439 ecount[shared[i][j]]++; 440 } 441 } 442 if (ne) { 443 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 444 } 445 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 446 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 447 for (i=1;i<n_neigh;i++) { 448 for (j=0;j<n_shared[i];j++) { 449 PetscInt k = shared[i][j]; 450 eneighs[k][ecount[k]] = neigh[i]; 451 ecount[k]++; 452 } 453 } 454 for (i=0;i<ne;i++) { 455 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 456 } 457 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 458 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 459 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 460 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 461 for (i=1,cum=0;i<n_neigh;i++) { 462 cum += n_shared[i]; 463 for (j=0;j<n_shared[i];j++) { 464 vcount[shared[i][j]]++; 465 } 466 } 467 if (nv) { 468 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 469 } 470 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 471 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 472 for (i=1;i<n_neigh;i++) { 473 for (j=0;j<n_shared[i];j++) { 474 PetscInt k = shared[i][j]; 475 vneighs[k][vcount[k]] = neigh[i]; 476 vcount[k]++; 477 } 478 } 479 for (i=0;i<nv;i++) { 480 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 481 } 482 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 483 484 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 485 for proper detection of coarse edges' endpoints */ 486 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 487 for (i=0;i<ne;i++) { 488 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 489 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 493 if (!conforming) { 494 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 495 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 496 } 497 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 498 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 499 cum = 0; 500 for (i=0;i<ne;i++) { 501 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 502 if (!PetscBTLookup(btee,i)) { 503 marks[cum++] = i; 504 continue; 505 } 506 /* set badly connected edge dofs as primal */ 507 if (!conforming) { 508 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 509 marks[cum++] = i; 510 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 511 for (j=ii[i];j<ii[i+1];j++) { 512 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 513 } 514 } else { 515 /* every edge dofs should be connected trough a certain number of nodal dofs 516 to other edge dofs belonging to coarse edges 517 - at most 2 endpoints 518 - order-1 interior nodal dofs 519 - no undefined nodal dofs (nconn < order) 520 */ 521 PetscInt ends = 0,ints = 0, undef = 0; 522 for (j=ii[i];j<ii[i+1];j++) { 523 PetscInt v = jj[j],k; 524 PetscInt nconn = iit[v+1]-iit[v]; 525 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 526 if (nconn > order) ends++; 527 else if (nconn == order) ints++; 528 else undef++; 529 } 530 if (undef || ends > 2 || ints != order -1) { 531 marks[cum++] = i; 532 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 533 for (j=ii[i];j<ii[i+1];j++) { 534 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 535 } 536 } 537 } 538 } 539 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 540 if (!order && ii[i+1] != ii[i]) { 541 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 542 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 543 } 544 } 545 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 546 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 547 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 548 if (!conforming) { 549 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 550 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 551 } 552 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 553 554 /* identify splitpoints and corner candidates */ 555 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 556 if (print) { 557 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 558 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 559 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 560 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 561 } 562 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 563 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 564 for (i=0;i<nv;i++) { 565 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 566 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 567 if (!order) { /* variable order */ 568 PetscReal vorder = 0.; 569 570 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 571 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 572 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 573 ord = 1; 574 } 575 #if defined(PETSC_USE_DEBUG) 576 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 577 #endif 578 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 579 if (PetscBTLookup(btbd,jj[j])) { 580 bdir = PETSC_TRUE; 581 break; 582 } 583 if (vc != ecount[jj[j]]) { 584 sneighs = PETSC_FALSE; 585 } else { 586 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 587 for (k=0;k<vc;k++) { 588 if (vn[k] != en[k]) { 589 sneighs = PETSC_FALSE; 590 break; 591 } 592 } 593 } 594 } 595 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 596 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 597 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 598 } else if (test == ord) { 599 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 600 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 601 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 602 } else { 603 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 604 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 605 } 606 } 607 } 608 ierr = PetscFree(ecount);CHKERRQ(ierr); 609 ierr = PetscFree(vcount);CHKERRQ(ierr); 610 if (ne) { 611 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 612 } 613 if (nv) { 614 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 615 } 616 ierr = PetscFree(eneighs);CHKERRQ(ierr); 617 ierr = PetscFree(vneighs);CHKERRQ(ierr); 618 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 619 620 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 621 if (order != 1) { 622 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 623 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 624 for (i=0;i<nv;i++) { 625 if (PetscBTLookup(btvcand,i)) { 626 PetscBool found = PETSC_FALSE; 627 for (j=ii[i];j<ii[i+1] && !found;j++) { 628 PetscInt k,e = jj[j]; 629 if (PetscBTLookup(bte,e)) continue; 630 for (k=iit[e];k<iit[e+1];k++) { 631 PetscInt v = jjt[k]; 632 if (v != i && PetscBTLookup(btvcand,v)) { 633 found = PETSC_TRUE; 634 break; 635 } 636 } 637 } 638 if (!found) { 639 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 640 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 641 } else { 642 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 643 } 644 } 645 } 646 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 647 } 648 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 649 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 650 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 651 652 /* Get the local G^T explicitly */ 653 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 654 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 655 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 656 657 /* Mark interior nodal dofs */ 658 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 659 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 660 for (i=1;i<n_neigh;i++) { 661 for (j=0;j<n_shared[i];j++) { 662 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 663 } 664 } 665 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 666 667 /* communicate corners and splitpoints */ 668 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 669 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 670 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 671 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 672 673 if (print) { 674 IS tbz; 675 676 cum = 0; 677 for (i=0;i<nv;i++) 678 if (sfvleaves[i]) 679 vmarks[cum++] = i; 680 681 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 682 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 683 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 684 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 685 } 686 687 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 688 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 689 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 690 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 691 692 /* Zero rows of lGt corresponding to identified corners 693 and interior nodal dofs */ 694 cum = 0; 695 for (i=0;i<nv;i++) { 696 if (sfvleaves[i]) { 697 vmarks[cum++] = i; 698 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 699 } 700 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 701 } 702 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 703 if (print) { 704 IS tbz; 705 706 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 707 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 708 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 709 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 710 } 711 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 712 ierr = PetscFree(vmarks);CHKERRQ(ierr); 713 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 714 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 715 716 /* Recompute G */ 717 ierr = MatDestroy(&lG);CHKERRQ(ierr); 718 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 719 if (print) { 720 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 721 ierr = MatView(lG,NULL);CHKERRQ(ierr); 722 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 723 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 724 } 725 726 /* Get primal dofs (if any) */ 727 cum = 0; 728 for (i=0;i<ne;i++) { 729 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 730 } 731 if (fl2g) { 732 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 733 } 734 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 735 if (print) { 736 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 737 ierr = ISView(primals,NULL);CHKERRQ(ierr); 738 } 739 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 740 /* TODO: what if the user passed in some of them ? */ 741 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 742 ierr = ISDestroy(&primals);CHKERRQ(ierr); 743 744 /* Compute edge connectivity */ 745 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 746 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 747 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 748 if (fl2g) { 749 PetscBT btf; 750 PetscInt *iia,*jja,*iiu,*jju; 751 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 752 753 /* create CSR for all local dofs */ 754 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 755 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 756 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 757 iiu = pcbddc->mat_graph->xadj; 758 jju = pcbddc->mat_graph->adjncy; 759 } else if (pcbddc->use_local_adj) { 760 rest = PETSC_TRUE; 761 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 762 } else { 763 free = PETSC_TRUE; 764 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 765 iiu[0] = 0; 766 for (i=0;i<n;i++) { 767 iiu[i+1] = i+1; 768 jju[i] = -1; 769 } 770 } 771 772 /* import sizes of CSR */ 773 iia[0] = 0; 774 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 775 776 /* overwrite entries corresponding to the Nedelec field */ 777 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 778 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 779 for (i=0;i<ne;i++) { 780 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 781 iia[idxs[i]+1] = ii[i+1]-ii[i]; 782 } 783 784 /* iia in CSR */ 785 for (i=0;i<n;i++) iia[i+1] += iia[i]; 786 787 /* jja in CSR */ 788 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 789 for (i=0;i<n;i++) 790 if (!PetscBTLookup(btf,i)) 791 for (j=0;j<iiu[i+1]-iiu[i];j++) 792 jja[iia[i]+j] = jju[iiu[i]+j]; 793 794 /* map edge dofs connectivity */ 795 if (jj) { 796 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 797 for (i=0;i<ne;i++) { 798 PetscInt e = idxs[i]; 799 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 800 } 801 } 802 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 804 if (rest) { 805 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 806 } 807 if (free) { 808 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 809 } 810 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 811 } else { 812 if (jj) { 813 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 814 } 815 } 816 817 /* Analyze interface for edge dofs */ 818 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 819 820 /* Get coarse edges in the edge space */ 821 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 822 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 823 824 if (fl2g) { 825 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 826 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 827 for (i=0;i<nee;i++) { 828 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 829 } 830 } else { 831 eedges = alleedges; 832 primals = allprimals; 833 } 834 835 /* Mark fine edge dofs with their coarse edge id */ 836 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 837 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 838 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 839 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 840 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 841 if (print) { 842 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 843 ierr = ISView(primals,NULL);CHKERRQ(ierr); 844 } 845 846 maxsize = 0; 847 for (i=0;i<nee;i++) { 848 PetscInt size,mark = i+1; 849 850 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 851 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 852 for (j=0;j<size;j++) marks[idxs[j]] = mark; 853 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 854 maxsize = PetscMax(maxsize,size); 855 } 856 857 /* Find coarse edge endpoints */ 858 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 859 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 860 for (i=0;i<nee;i++) { 861 PetscInt mark = i+1,size; 862 863 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 864 if (!size && nedfieldlocal) continue; 865 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 866 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 867 if (print) { 868 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 869 ISView(eedges[i],NULL); 870 } 871 for (j=0;j<size;j++) { 872 PetscInt k, ee = idxs[j]; 873 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 874 for (k=ii[ee];k<ii[ee+1];k++) { 875 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 876 if (PetscBTLookup(btv,jj[k])) { 877 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 878 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 879 PetscInt k2; 880 PetscBool corner = PETSC_FALSE; 881 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 882 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 883 /* it's a corner if either is connected with an edge dof belonging to a different cc or 884 if the edge dof lie on the natural part of the boundary */ 885 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 886 corner = PETSC_TRUE; 887 break; 888 } 889 } 890 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 891 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 892 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 893 } else { 894 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 895 } 896 } 897 } 898 } 899 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 900 } 901 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 902 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 903 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 904 905 /* Reset marked primal dofs */ 906 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 907 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 908 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 909 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 910 911 /* Now use the initial lG */ 912 ierr = MatDestroy(&lG);CHKERRQ(ierr); 913 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 914 lG = lGinit; 915 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 916 917 /* Compute extended cols indices */ 918 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 919 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 920 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 921 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 922 i *= maxsize; 923 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 924 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 925 eerr = PETSC_FALSE; 926 for (i=0;i<nee;i++) { 927 PetscInt size,found = 0; 928 929 cum = 0; 930 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 931 if (!size && nedfieldlocal) continue; 932 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 933 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 934 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 935 for (j=0;j<size;j++) { 936 PetscInt k,ee = idxs[j]; 937 for (k=ii[ee];k<ii[ee+1];k++) { 938 PetscInt vv = jj[k]; 939 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 940 else if (!PetscBTLookupSet(btvc,vv)) found++; 941 } 942 } 943 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 944 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 945 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 946 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 947 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 948 /* it may happen that endpoints are not defined at this point 949 if it is the case, mark this edge for a second pass */ 950 if (cum != size -1 || found != 2) { 951 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 952 if (print) { 953 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 954 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 955 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 956 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 957 } 958 eerr = PETSC_TRUE; 959 } 960 } 961 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 962 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 963 if (done) { 964 PetscInt *newprimals; 965 966 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 967 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 968 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 969 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 970 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 971 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 972 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 973 for (i=0;i<nee;i++) { 974 PetscBool has_candidates = PETSC_FALSE; 975 if (PetscBTLookup(bter,i)) { 976 PetscInt size,mark = i+1; 977 978 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 979 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 980 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 981 for (j=0;j<size;j++) { 982 PetscInt k,ee = idxs[j]; 983 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 984 for (k=ii[ee];k<ii[ee+1];k++) { 985 /* set all candidates located on the edge as corners */ 986 if (PetscBTLookup(btvcand,jj[k])) { 987 PetscInt k2,vv = jj[k]; 988 has_candidates = PETSC_TRUE; 989 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 990 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 991 /* set all edge dofs connected to candidate as primals */ 992 for (k2=iit[vv];k2<iit[vv+1];k2++) { 993 if (marks[jjt[k2]] == mark) { 994 PetscInt k3,ee2 = jjt[k2]; 995 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 996 newprimals[cum++] = ee2; 997 /* finally set the new corners */ 998 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 999 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1000 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1001 } 1002 } 1003 } 1004 } else { 1005 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1006 } 1007 } 1008 } 1009 if (!has_candidates) { /* circular edge */ 1010 PetscInt k, ee = idxs[0],*tmarks; 1011 1012 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1013 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1014 for (k=ii[ee];k<ii[ee+1];k++) { 1015 PetscInt k2; 1016 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1017 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1018 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1019 } 1020 for (j=0;j<size;j++) { 1021 if (tmarks[idxs[j]] > 1) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1023 newprimals[cum++] = idxs[j]; 1024 } 1025 } 1026 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1027 } 1028 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1029 } 1030 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1031 } 1032 ierr = PetscFree(extcols);CHKERRQ(ierr); 1033 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1034 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1035 if (fl2g) { 1036 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1037 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1038 for (i=0;i<nee;i++) { 1039 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1040 } 1041 ierr = PetscFree(eedges);CHKERRQ(ierr); 1042 } 1043 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1044 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1045 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1046 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1047 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1048 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1049 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1050 if (fl2g) { 1051 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1052 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1053 for (i=0;i<nee;i++) { 1054 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1055 } 1056 } else { 1057 eedges = alleedges; 1058 primals = allprimals; 1059 } 1060 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1061 1062 /* Mark again */ 1063 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1064 for (i=0;i<nee;i++) { 1065 PetscInt size,mark = i+1; 1066 1067 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1068 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1069 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1070 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1071 } 1072 if (print) { 1073 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1074 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1075 } 1076 1077 /* Recompute extended cols */ 1078 eerr = PETSC_FALSE; 1079 for (i=0;i<nee;i++) { 1080 PetscInt size; 1081 1082 cum = 0; 1083 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1084 if (!size && nedfieldlocal) continue; 1085 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1086 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1087 for (j=0;j<size;j++) { 1088 PetscInt k,ee = idxs[j]; 1089 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1090 } 1091 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1092 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1093 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1094 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1095 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1096 if (cum != size -1) { 1097 if (print) { 1098 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1099 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1100 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1101 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1102 } 1103 eerr = PETSC_TRUE; 1104 } 1105 } 1106 } 1107 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1108 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1109 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1110 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1111 /* an error should not occur at this point */ 1112 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1113 1114 /* Check the number of endpoints */ 1115 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1116 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1117 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1118 for (i=0;i<nee;i++) { 1119 PetscInt size, found = 0, gc[2]; 1120 1121 /* init with defaults */ 1122 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1123 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1124 if (!size && nedfieldlocal) continue; 1125 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1126 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1127 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1128 for (j=0;j<size;j++) { 1129 PetscInt k,ee = idxs[j]; 1130 for (k=ii[ee];k<ii[ee+1];k++) { 1131 PetscInt vv = jj[k]; 1132 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1133 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1134 corners[i*2+found++] = vv; 1135 } 1136 } 1137 } 1138 if (found != 2) { 1139 PetscInt e; 1140 if (fl2g) { 1141 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1142 } else { 1143 e = idxs[0]; 1144 } 1145 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1146 } 1147 1148 /* get primal dof index on this coarse edge */ 1149 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1150 if (gc[0] > gc[1]) { 1151 PetscInt swap = corners[2*i]; 1152 corners[2*i] = corners[2*i+1]; 1153 corners[2*i+1] = swap; 1154 } 1155 cedges[i] = idxs[size-1]; 1156 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1157 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1158 } 1159 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1160 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1161 1162 #if defined(PETSC_USE_DEBUG) 1163 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1164 not interfere with neighbouring coarse edges */ 1165 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1166 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1167 for (i=0;i<nv;i++) { 1168 PetscInt emax = 0,eemax = 0; 1169 1170 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1171 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1172 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1173 for (j=1;j<nee+1;j++) { 1174 if (emax < emarks[j]) { 1175 emax = emarks[j]; 1176 eemax = j; 1177 } 1178 } 1179 /* not relevant for edges */ 1180 if (!eemax) continue; 1181 1182 for (j=ii[i];j<ii[i+1];j++) { 1183 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1184 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1185 } 1186 } 1187 } 1188 ierr = PetscFree(emarks);CHKERRQ(ierr); 1189 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1190 #endif 1191 1192 /* Compute extended rows indices for edge blocks of the change of basis */ 1193 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1194 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1195 extmem *= maxsize; 1196 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1197 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1198 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1199 for (i=0;i<nv;i++) { 1200 PetscInt mark = 0,size,start; 1201 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1202 for (j=ii[i];j<ii[i+1];j++) 1203 if (marks[jj[j]] && !mark) 1204 mark = marks[jj[j]]; 1205 1206 /* not relevant */ 1207 if (!mark) continue; 1208 1209 /* import extended row */ 1210 mark--; 1211 start = mark*extmem+extrowcum[mark]; 1212 size = ii[i+1]-ii[i]; 1213 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1214 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1215 extrowcum[mark] += size; 1216 } 1217 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 cum = 0; 1219 for (i=0;i<nee;i++) { 1220 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1221 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1222 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1223 cum = PetscMax(cum,size); 1224 } 1225 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1226 ierr = PetscFree(marks);CHKERRQ(ierr); 1227 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1228 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1229 1230 /* Workspace for lapack inner calls and VecSetValues */ 1231 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1232 1233 /* Create change of basis matrix (preallocation can be improved) */ 1234 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1235 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1236 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1237 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1238 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1239 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1240 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1241 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1242 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1243 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1244 1245 /* Defaults to identity */ 1246 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1247 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1248 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1249 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1250 1251 /* Create discrete gradient for the coarser level if needed */ 1252 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1253 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1254 if (pcbddc->current_level < pcbddc->max_levels) { 1255 ISLocalToGlobalMapping cel2g,cvl2g; 1256 IS wis,gwis; 1257 PetscInt cnv,cne; 1258 1259 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1260 if (fl2g) { 1261 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1262 } else { 1263 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1264 pcbddc->nedclocal = wis; 1265 } 1266 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1267 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1268 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1269 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1270 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1271 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1272 1273 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1274 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1275 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1276 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1277 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1278 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1279 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1280 1281 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1282 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1283 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1284 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1285 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1286 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1287 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1288 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1289 } 1290 1291 #if defined(PRINT_GDET) 1292 inc = 0; 1293 lev = pcbddc->current_level; 1294 #endif 1295 for (i=0;i<nee;i++) { 1296 Mat Gins = NULL, GKins = NULL; 1297 IS cornersis = NULL; 1298 PetscScalar cvals[2]; 1299 1300 if (pcbddc->nedcG) { 1301 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1302 } 1303 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1304 if (Gins && GKins) { 1305 PetscScalar *data; 1306 const PetscInt *rows,*cols; 1307 PetscInt nrh,nch,nrc,ncc; 1308 1309 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1310 /* H1 */ 1311 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1312 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1313 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1314 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1315 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1316 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1317 /* complement */ 1318 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1319 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1320 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 1321 if (ncc != 1 && pcbddc->nedcG) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the dicrete gradient for the next level with ncc %d",ncc); 1322 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1323 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1324 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1325 1326 /* coarse discrete gradient */ 1327 if (pcbddc->nedcG) { 1328 PetscInt cols[2]; 1329 1330 cols[0] = 2*i; 1331 cols[1] = 2*i+1; 1332 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1333 } 1334 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1335 } 1336 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1337 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1338 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1339 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1340 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1341 } 1342 1343 /* Start assembling */ 1344 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1345 if (pcbddc->nedcG) { 1346 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1347 } 1348 1349 /* Free */ 1350 if (fl2g) { 1351 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1352 for (i=0;i<nee;i++) { 1353 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1354 } 1355 ierr = PetscFree(eedges);CHKERRQ(ierr); 1356 } 1357 1358 /* hack mat_graph with primal dofs on the coarse edges */ 1359 { 1360 PCBDDCGraph graph = pcbddc->mat_graph; 1361 PetscInt *oqueue = graph->queue; 1362 PetscInt *ocptr = graph->cptr; 1363 PetscInt ncc,*idxs; 1364 1365 /* find first primal edge */ 1366 if (pcbddc->nedclocal) { 1367 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1368 } else { 1369 if (fl2g) { 1370 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1371 } 1372 idxs = cedges; 1373 } 1374 cum = 0; 1375 while (cum < nee && cedges[cum] < 0) cum++; 1376 1377 /* adapt connected components */ 1378 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1379 graph->cptr[0] = 0; 1380 for (i=0,ncc=0;i<graph->ncc;i++) { 1381 PetscInt lc = ocptr[i+1]-ocptr[i]; 1382 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1383 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1384 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1385 ncc++; 1386 lc--; 1387 cum++; 1388 while (cum < nee && cedges[cum] < 0) cum++; 1389 } 1390 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1391 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1392 ncc++; 1393 } 1394 graph->ncc = ncc; 1395 if (pcbddc->nedclocal) { 1396 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1397 } 1398 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1399 } 1400 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1401 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1402 1403 1404 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1405 ierr = PetscFree(extrow);CHKERRQ(ierr); 1406 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1407 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1408 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1409 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1410 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1411 ierr = PetscFree(corners);CHKERRQ(ierr); 1412 ierr = PetscFree(cedges);CHKERRQ(ierr); 1413 ierr = PetscFree(extrows);CHKERRQ(ierr); 1414 ierr = PetscFree(extcols);CHKERRQ(ierr); 1415 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1416 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1417 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1418 1419 /* Complete assembling */ 1420 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1421 if (pcbddc->nedcG) { 1422 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1423 #if 0 1424 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1425 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1426 #endif 1427 } 1428 1429 /* set change of basis */ 1430 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1431 #if 0 1432 if (pcbddc->current_level) { 1433 PetscViewer viewer; 1434 char filename[256]; 1435 Mat Tned; 1436 IS sub; 1437 PetscInt rst; 1438 1439 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1440 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 1441 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 1442 if (nedfieldlocal) { 1443 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1444 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 1445 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1446 } else { 1447 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 1448 } 1449 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1450 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1451 ierr = MatGetOwnershipRange(pc->pmat,&rst,NULL);CHKERRQ(ierr); 1452 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 1453 if (matis->sf_rootdata[i]) { 1454 matis->sf_rootdata[cum++] = i + rst; 1455 } 1456 } 1457 PetscPrintf(PETSC_COMM_SELF,"[%D] LEVEL %d MY ne %d cum %d\n",PetscGlobalRank,pcbddc->current_level,ne,cum); 1458 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cum,matis->sf_rootdata,PETSC_USE_POINTER,&sub);CHKERRQ(ierr); 1459 ierr = MatGetSubMatrix(T,sub,sub,MAT_INITIAL_MATRIX,&Tned);CHKERRQ(ierr); 1460 ierr = ISDestroy(&sub);CHKERRQ(ierr); 1461 1462 sprintf(filename,"Change_l%d.m",pcbddc->current_level); 1463 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)Tned),filename,&viewer);CHKERRQ(ierr); 1464 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1465 ierr = PetscObjectSetName((PetscObject)Tned,"T");CHKERRQ(ierr); 1466 ierr = MatView(Tned,viewer);CHKERRQ(ierr); 1467 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1468 ierr = MatDestroy(&Tned);CHKERRQ(ierr); 1469 } 1470 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1471 #endif 1472 ierr = MatDestroy(&T);CHKERRQ(ierr); 1473 1474 PetscFunctionReturn(0); 1475 } 1476 1477 /* the near-null space of BDDC carries information on quadrature weights, 1478 and these can be collinear -> so cheat with MatNullSpaceCreate 1479 and create a suitable set of basis vectors first */ 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1482 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1483 { 1484 PetscErrorCode ierr; 1485 PetscInt i; 1486 1487 PetscFunctionBegin; 1488 for (i=0;i<nvecs;i++) { 1489 PetscInt first,last; 1490 1491 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1492 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1493 if (i>=first && i < last) { 1494 PetscScalar *data; 1495 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1496 if (!has_const) { 1497 data[i-first] = 1.; 1498 } else { 1499 data[2*i-first] = 1./PetscSqrtReal(2.); 1500 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1501 } 1502 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1503 } 1504 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1505 } 1506 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1507 for (i=0;i<nvecs;i++) { /* reset vectors */ 1508 PetscInt first,last; 1509 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1510 if (i>=first && i < last) { 1511 PetscScalar *data; 1512 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1513 if (!has_const) { 1514 data[i-first] = 0.; 1515 } else { 1516 data[2*i-first] = 0.; 1517 data[2*i-first+1] = 0.; 1518 } 1519 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1520 } 1521 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1522 } 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1528 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1529 { 1530 Mat loc_divudotp; 1531 Vec p,v,vins,quad_vec,*quad_vecs; 1532 ISLocalToGlobalMapping map; 1533 IS *faces,*edges; 1534 PetscScalar *vals; 1535 const PetscScalar *array; 1536 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1537 PetscMPIInt rank; 1538 PetscErrorCode ierr; 1539 1540 PetscFunctionBegin; 1541 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1542 if (graph->twodim) { 1543 lmaxneighs = 2; 1544 } else { 1545 lmaxneighs = 1; 1546 for (i=0;i<ne;i++) { 1547 const PetscInt *idxs; 1548 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1549 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1550 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1551 } 1552 lmaxneighs++; /* graph count does not include self */ 1553 } 1554 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1555 maxsize = 0; 1556 for (i=0;i<ne;i++) { 1557 PetscInt nn; 1558 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1559 maxsize = PetscMax(maxsize,nn); 1560 } 1561 for (i=0;i<nf;i++) { 1562 PetscInt nn; 1563 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1564 maxsize = PetscMax(maxsize,nn); 1565 } 1566 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1567 /* create vectors to hold quadrature weights */ 1568 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1569 if (!transpose) { 1570 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1571 } else { 1572 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1573 } 1574 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1575 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1576 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1577 for (i=0;i<maxneighs;i++) { 1578 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1579 } 1580 1581 /* compute local quad vec */ 1582 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1583 if (!transpose) { 1584 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1585 } else { 1586 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1587 } 1588 ierr = VecSet(p,1.);CHKERRQ(ierr); 1589 if (!transpose) { 1590 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1591 } else { 1592 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1593 } 1594 if (vl2l) { 1595 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1596 } else { 1597 vins = v; 1598 } 1599 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1600 ierr = VecDestroy(&p);CHKERRQ(ierr); 1601 1602 /* insert in global quadrature vecs */ 1603 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1604 for (i=0;i<nf;i++) { 1605 const PetscInt *idxs; 1606 PetscInt idx,nn,j; 1607 1608 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1609 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1610 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1611 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1612 idx = -(idx+1); 1613 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1614 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1615 } 1616 for (i=0;i<ne;i++) { 1617 const PetscInt *idxs; 1618 PetscInt idx,nn,j; 1619 1620 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1621 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1622 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1623 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1624 idx = -(idx+1); 1625 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1626 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1627 } 1628 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1629 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1630 if (vl2l) { 1631 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1632 } 1633 ierr = VecDestroy(&v);CHKERRQ(ierr); 1634 ierr = PetscFree(vals);CHKERRQ(ierr); 1635 1636 /* assemble near null space */ 1637 for (i=0;i<maxneighs;i++) { 1638 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1639 } 1640 for (i=0;i<maxneighs;i++) { 1641 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1642 } 1643 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1644 PetscFunctionReturn(0); 1645 } 1646 1647 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1650 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1651 { 1652 PetscErrorCode ierr; 1653 Vec local,global; 1654 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1655 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1656 1657 PetscFunctionBegin; 1658 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1659 /* need to convert from global to local topology information and remove references to information in global ordering */ 1660 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1661 if (pcbddc->user_provided_isfordofs) { 1662 if (pcbddc->n_ISForDofs) { 1663 PetscInt i; 1664 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1665 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1666 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1667 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1668 } 1669 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1670 pcbddc->n_ISForDofs = 0; 1671 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1672 } 1673 } else { 1674 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1675 PetscInt i, n = matis->A->rmap->n; 1676 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1677 if (i > 1) { 1678 pcbddc->n_ISForDofsLocal = i; 1679 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1680 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1681 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1682 } 1683 } 1684 } 1685 } 1686 1687 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1688 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1689 } 1690 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1692 } 1693 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1694 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1695 } 1696 ierr = VecDestroy(&global);CHKERRQ(ierr); 1697 ierr = VecDestroy(&local);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNCT__ 1702 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1703 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1704 { 1705 PC_IS *pcis = (PC_IS*)(pc->data); 1706 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 if (!pcbddc->benign_have_null) { 1711 PetscFunctionReturn(0); 1712 } 1713 if (pcbddc->ChangeOfBasisMatrix) { 1714 Vec swap; 1715 1716 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1717 swap = pcbddc->work_change; 1718 pcbddc->work_change = r; 1719 r = swap; 1720 } 1721 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1722 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1723 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1724 ierr = VecSet(z,0.);CHKERRQ(ierr); 1725 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1726 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1727 if (pcbddc->ChangeOfBasisMatrix) { 1728 pcbddc->work_change = r; 1729 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1730 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1731 } 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1737 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1738 { 1739 PCBDDCBenignMatMult_ctx ctx; 1740 PetscErrorCode ierr; 1741 PetscBool apply_right,apply_left,reset_x; 1742 1743 PetscFunctionBegin; 1744 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1745 if (transpose) { 1746 apply_right = ctx->apply_left; 1747 apply_left = ctx->apply_right; 1748 } else { 1749 apply_right = ctx->apply_right; 1750 apply_left = ctx->apply_left; 1751 } 1752 reset_x = PETSC_FALSE; 1753 if (apply_right) { 1754 const PetscScalar *ax; 1755 PetscInt nl,i; 1756 1757 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1758 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1759 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1760 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1761 for (i=0;i<ctx->benign_n;i++) { 1762 PetscScalar sum,val; 1763 const PetscInt *idxs; 1764 PetscInt nz,j; 1765 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1766 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1767 sum = 0.; 1768 if (ctx->apply_p0) { 1769 val = ctx->work[idxs[nz-1]]; 1770 for (j=0;j<nz-1;j++) { 1771 sum += ctx->work[idxs[j]]; 1772 ctx->work[idxs[j]] += val; 1773 } 1774 } else { 1775 for (j=0;j<nz-1;j++) { 1776 sum += ctx->work[idxs[j]]; 1777 } 1778 } 1779 ctx->work[idxs[nz-1]] -= sum; 1780 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1781 } 1782 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1783 reset_x = PETSC_TRUE; 1784 } 1785 if (transpose) { 1786 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1787 } else { 1788 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1789 } 1790 if (reset_x) { 1791 ierr = VecResetArray(x);CHKERRQ(ierr); 1792 } 1793 if (apply_left) { 1794 PetscScalar *ay; 1795 PetscInt i; 1796 1797 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1798 for (i=0;i<ctx->benign_n;i++) { 1799 PetscScalar sum,val; 1800 const PetscInt *idxs; 1801 PetscInt nz,j; 1802 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1803 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1804 val = -ay[idxs[nz-1]]; 1805 if (ctx->apply_p0) { 1806 sum = 0.; 1807 for (j=0;j<nz-1;j++) { 1808 sum += ay[idxs[j]]; 1809 ay[idxs[j]] += val; 1810 } 1811 ay[idxs[nz-1]] += sum; 1812 } else { 1813 for (j=0;j<nz-1;j++) { 1814 ay[idxs[j]] += val; 1815 } 1816 ay[idxs[nz-1]] = 0.; 1817 } 1818 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1819 } 1820 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1821 } 1822 PetscFunctionReturn(0); 1823 } 1824 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1827 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1828 { 1829 PetscErrorCode ierr; 1830 1831 PetscFunctionBegin; 1832 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1838 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1839 { 1840 PetscErrorCode ierr; 1841 1842 PetscFunctionBegin; 1843 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "PCBDDCBenignShellMat" 1849 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1850 { 1851 PC_IS *pcis = (PC_IS*)pc->data; 1852 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1853 PCBDDCBenignMatMult_ctx ctx; 1854 PetscErrorCode ierr; 1855 1856 PetscFunctionBegin; 1857 if (!restore) { 1858 Mat A_IB,A_BI; 1859 PetscScalar *work; 1860 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1861 1862 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1863 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1864 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1865 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1866 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1867 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1868 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1869 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1870 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1871 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1872 ctx->apply_left = PETSC_TRUE; 1873 ctx->apply_right = PETSC_FALSE; 1874 ctx->apply_p0 = PETSC_FALSE; 1875 ctx->benign_n = pcbddc->benign_n; 1876 if (reuse) { 1877 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1878 ctx->free = PETSC_FALSE; 1879 } else { /* TODO: could be optimized for successive solves */ 1880 ISLocalToGlobalMapping N_to_D; 1881 PetscInt i; 1882 1883 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1884 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1885 for (i=0;i<pcbddc->benign_n;i++) { 1886 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1887 } 1888 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1889 ctx->free = PETSC_TRUE; 1890 } 1891 ctx->A = pcis->A_IB; 1892 ctx->work = work; 1893 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1894 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1895 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1896 pcis->A_IB = A_IB; 1897 1898 /* A_BI as A_IB^T */ 1899 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1900 pcbddc->benign_original_mat = pcis->A_BI; 1901 pcis->A_BI = A_BI; 1902 } else { 1903 if (!pcbddc->benign_original_mat) { 1904 PetscFunctionReturn(0); 1905 } 1906 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1907 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1908 pcis->A_IB = ctx->A; 1909 ctx->A = NULL; 1910 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1911 pcis->A_BI = pcbddc->benign_original_mat; 1912 pcbddc->benign_original_mat = NULL; 1913 if (ctx->free) { 1914 PetscInt i; 1915 for (i=0;i<ctx->benign_n;i++) { 1916 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1917 } 1918 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1919 } 1920 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1921 ierr = PetscFree(ctx);CHKERRQ(ierr); 1922 } 1923 PetscFunctionReturn(0); 1924 } 1925 1926 /* used just in bddc debug mode */ 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "PCBDDCBenignProject" 1929 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1930 { 1931 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1932 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1933 Mat An; 1934 PetscErrorCode ierr; 1935 1936 PetscFunctionBegin; 1937 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1938 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1939 if (is1) { 1940 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1941 ierr = MatDestroy(&An);CHKERRQ(ierr); 1942 } else { 1943 *B = An; 1944 } 1945 PetscFunctionReturn(0); 1946 } 1947 1948 /* TODO: add reuse flag */ 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatSeqAIJCompress" 1951 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1952 { 1953 Mat Bt; 1954 PetscScalar *a,*bdata; 1955 const PetscInt *ii,*ij; 1956 PetscInt m,n,i,nnz,*bii,*bij; 1957 PetscBool flg_row; 1958 PetscErrorCode ierr; 1959 1960 PetscFunctionBegin; 1961 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1962 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1963 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1964 nnz = n; 1965 for (i=0;i<ii[n];i++) { 1966 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1967 } 1968 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1969 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1970 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1971 nnz = 0; 1972 bii[0] = 0; 1973 for (i=0;i<n;i++) { 1974 PetscInt j; 1975 for (j=ii[i];j<ii[i+1];j++) { 1976 PetscScalar entry = a[j]; 1977 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1978 bij[nnz] = ij[j]; 1979 bdata[nnz] = entry; 1980 nnz++; 1981 } 1982 } 1983 bii[i+1] = nnz; 1984 } 1985 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1986 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1987 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1988 { 1989 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1990 b->free_a = PETSC_TRUE; 1991 b->free_ij = PETSC_TRUE; 1992 } 1993 *B = Bt; 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatDetectDisconnectedComponents" 1999 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 2000 { 2001 Mat B; 2002 IS is_dummy,*cc_n; 2003 ISLocalToGlobalMapping l2gmap_dummy; 2004 PCBDDCGraph graph; 2005 PetscInt i,n; 2006 PetscInt *xadj,*adjncy; 2007 PetscInt *xadj_filtered,*adjncy_filtered; 2008 PetscBool flg_row,isseqaij; 2009 PetscErrorCode ierr; 2010 2011 PetscFunctionBegin; 2012 if (!A->rmap->N || !A->cmap->N) { 2013 *ncc = 0; 2014 *cc = NULL; 2015 PetscFunctionReturn(0); 2016 } 2017 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2018 if (!isseqaij && filter) { 2019 PetscBool isseqdense; 2020 2021 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2022 if (!isseqdense) { 2023 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2024 } else { /* TODO: rectangular case and LDA */ 2025 PetscScalar *array; 2026 PetscReal chop=1.e-6; 2027 2028 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2029 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2030 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2031 for (i=0;i<n;i++) { 2032 PetscInt j; 2033 for (j=i+1;j<n;j++) { 2034 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2035 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2036 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2037 } 2038 } 2039 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2040 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2041 } 2042 } else { 2043 B = A; 2044 } 2045 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2046 2047 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2048 if (filter) { 2049 PetscScalar *data; 2050 PetscInt j,cum; 2051 2052 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2053 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2054 cum = 0; 2055 for (i=0;i<n;i++) { 2056 PetscInt t; 2057 2058 for (j=xadj[i];j<xadj[i+1];j++) { 2059 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2060 continue; 2061 } 2062 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2063 } 2064 t = xadj_filtered[i]; 2065 xadj_filtered[i] = cum; 2066 cum += t; 2067 } 2068 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2069 } else { 2070 xadj_filtered = NULL; 2071 adjncy_filtered = NULL; 2072 } 2073 2074 /* compute local connected components using PCBDDCGraph */ 2075 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2076 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2077 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2078 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2079 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2080 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2081 if (xadj_filtered) { 2082 graph->xadj = xadj_filtered; 2083 graph->adjncy = adjncy_filtered; 2084 } else { 2085 graph->xadj = xadj; 2086 graph->adjncy = adjncy; 2087 } 2088 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2089 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2090 /* partial clean up */ 2091 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2092 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2093 if (A != B) { 2094 ierr = MatDestroy(&B);CHKERRQ(ierr); 2095 } 2096 2097 /* get back data */ 2098 if (ncc) *ncc = graph->ncc; 2099 if (cc) { 2100 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2101 for (i=0;i<graph->ncc;i++) { 2102 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2103 } 2104 *cc = cc_n; 2105 } 2106 /* clean up graph */ 2107 graph->xadj = 0; 2108 graph->adjncy = 0; 2109 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2110 PetscFunctionReturn(0); 2111 } 2112 2113 #undef __FUNCT__ 2114 #define __FUNCT__ "PCBDDCBenignCheck" 2115 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2116 { 2117 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2118 PC_IS* pcis = (PC_IS*)(pc->data); 2119 IS dirIS = NULL; 2120 PetscInt i; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2125 if (zerodiag) { 2126 Mat A; 2127 Vec vec3_N; 2128 PetscScalar *vals; 2129 const PetscInt *idxs; 2130 PetscInt nz,*count; 2131 2132 /* p0 */ 2133 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2134 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2135 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2136 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2137 for (i=0;i<nz;i++) vals[i] = 1.; 2138 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2139 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2140 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2141 /* v_I */ 2142 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2143 for (i=0;i<nz;i++) vals[i] = 0.; 2144 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2145 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2146 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2147 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2148 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2149 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2150 if (dirIS) { 2151 PetscInt n; 2152 2153 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2154 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2155 for (i=0;i<n;i++) vals[i] = 0.; 2156 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2157 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2158 } 2159 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2160 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2161 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2162 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2163 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2164 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2165 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2166 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2167 ierr = PetscFree(vals);CHKERRQ(ierr); 2168 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2169 2170 /* there should not be any pressure dofs lying on the interface */ 2171 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2172 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2173 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2174 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2175 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2176 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2177 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2178 ierr = PetscFree(count);CHKERRQ(ierr); 2179 } 2180 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2181 2182 /* check PCBDDCBenignGetOrSetP0 */ 2183 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2184 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2185 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2186 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2187 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2188 for (i=0;i<pcbddc->benign_n;i++) { 2189 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2190 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2191 } 2192 PetscFunctionReturn(0); 2193 } 2194 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 2197 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2198 { 2199 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2200 IS pressures,zerodiag,*zerodiag_subs; 2201 PetscInt nz,n; 2202 PetscInt *interior_dofs,n_interior_dofs; 2203 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2208 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2209 for (n=0;n<pcbddc->benign_n;n++) { 2210 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2211 } 2212 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2213 pcbddc->benign_n = 0; 2214 /* if a local info on dofs is present, assumes that the last field represents "pressures" 2215 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2216 Checks if all the pressure dofs in each subdomain have a zero diagonal 2217 If not, a change of basis on pressures is not needed 2218 since the local Schur complements are already SPD 2219 */ 2220 has_null_pressures = PETSC_TRUE; 2221 have_null = PETSC_TRUE; 2222 if (pcbddc->n_ISForDofsLocal) { 2223 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2224 2225 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2226 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2227 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2228 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2229 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2230 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2231 if (!sorted) { 2232 ierr = ISSort(pressures);CHKERRQ(ierr); 2233 } 2234 } else { 2235 pressures = NULL; 2236 } 2237 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2238 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2239 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2240 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2241 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2242 if (!sorted) { 2243 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2244 } 2245 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2246 if (!nz) { 2247 if (n) have_null = PETSC_FALSE; 2248 has_null_pressures = PETSC_FALSE; 2249 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2250 } 2251 recompute_zerodiag = PETSC_FALSE; 2252 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2253 zerodiag_subs = NULL; 2254 pcbddc->benign_n = 0; 2255 n_interior_dofs = 0; 2256 interior_dofs = NULL; 2257 if (pcbddc->current_level) { /* need to compute interior nodes */ 2258 PetscInt n,i,j; 2259 PetscInt n_neigh,*neigh,*n_shared,**shared; 2260 PetscInt *iwork; 2261 2262 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2263 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2264 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2265 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2266 for (i=1;i<n_neigh;i++) 2267 for (j=0;j<n_shared[i];j++) 2268 iwork[shared[i][j]] += 1; 2269 for (i=0;i<n;i++) 2270 if (!iwork[i]) 2271 interior_dofs[n_interior_dofs++] = i; 2272 ierr = PetscFree(iwork);CHKERRQ(ierr); 2273 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2274 } 2275 if (has_null_pressures) { 2276 IS *subs; 2277 PetscInt nsubs,i,j,nl; 2278 const PetscInt *idxs; 2279 PetscScalar *array; 2280 Vec *work; 2281 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2282 2283 subs = pcbddc->local_subs; 2284 nsubs = pcbddc->n_local_subs; 2285 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2286 if (pcbddc->current_level) { 2287 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2288 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2289 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2290 /* work[0] = 1_p */ 2291 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2292 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2293 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2294 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2295 /* work[0] = 1_v */ 2296 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2297 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2298 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2299 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2300 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2301 } 2302 if (nsubs > 1) { 2303 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2304 for (i=0;i<nsubs;i++) { 2305 ISLocalToGlobalMapping l2g; 2306 IS t_zerodiag_subs; 2307 PetscInt nl; 2308 2309 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2310 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2311 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2312 if (nl) { 2313 PetscBool valid = PETSC_TRUE; 2314 2315 if (pcbddc->current_level) { 2316 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2317 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2318 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2319 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2320 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2321 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2322 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2323 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2324 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2325 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2326 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2327 for (j=0;j<n_interior_dofs;j++) { 2328 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2329 valid = PETSC_FALSE; 2330 break; 2331 } 2332 } 2333 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2334 } 2335 if (valid && pcbddc->NeumannBoundariesLocal) { 2336 IS t_bc; 2337 PetscInt nzb; 2338 2339 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 2340 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 2341 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 2342 if (nzb) valid = PETSC_FALSE; 2343 } 2344 if (valid && pressures) { 2345 IS t_pressure_subs; 2346 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2347 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2348 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2349 } 2350 if (valid) { 2351 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2352 pcbddc->benign_n++; 2353 } else { 2354 recompute_zerodiag = PETSC_TRUE; 2355 } 2356 } 2357 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2358 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2359 } 2360 } else { /* there's just one subdomain (or zero if they have not been detected */ 2361 PetscBool valid = PETSC_TRUE; 2362 2363 if (pcbddc->NeumannBoundariesLocal) { 2364 PetscInt nzb; 2365 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 2366 if (nzb) valid = PETSC_FALSE; 2367 } 2368 if (valid && pressures) { 2369 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2370 } 2371 if (valid && pcbddc->current_level) { 2372 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2373 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2374 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2375 for (j=0;j<n_interior_dofs;j++) { 2376 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2377 valid = PETSC_FALSE; 2378 break; 2379 } 2380 } 2381 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2382 } 2383 if (valid) { 2384 pcbddc->benign_n = 1; 2385 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2386 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2387 zerodiag_subs[0] = zerodiag; 2388 } 2389 } 2390 if (pcbddc->current_level) { 2391 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2392 } 2393 } 2394 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2395 2396 if (!pcbddc->benign_n) { 2397 PetscInt n; 2398 2399 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2400 recompute_zerodiag = PETSC_FALSE; 2401 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2402 if (n) { 2403 has_null_pressures = PETSC_FALSE; 2404 have_null = PETSC_FALSE; 2405 } 2406 } 2407 2408 /* final check for null pressures */ 2409 if (zerodiag && pressures) { 2410 PetscInt nz,np; 2411 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2412 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2413 if (nz != np) have_null = PETSC_FALSE; 2414 } 2415 2416 if (recompute_zerodiag) { 2417 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2418 if (pcbddc->benign_n == 1) { 2419 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2420 zerodiag = zerodiag_subs[0]; 2421 } else { 2422 PetscInt i,nzn,*new_idxs; 2423 2424 nzn = 0; 2425 for (i=0;i<pcbddc->benign_n;i++) { 2426 PetscInt ns; 2427 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2428 nzn += ns; 2429 } 2430 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2431 nzn = 0; 2432 for (i=0;i<pcbddc->benign_n;i++) { 2433 PetscInt ns,*idxs; 2434 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2435 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2436 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2437 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2438 nzn += ns; 2439 } 2440 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2441 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2442 } 2443 have_null = PETSC_FALSE; 2444 } 2445 2446 /* Prepare matrix to compute no-net-flux */ 2447 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2448 Mat A,loc_divudotp; 2449 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2450 IS row,col,isused = NULL; 2451 PetscInt M,N,n,st,n_isused; 2452 2453 if (pressures) { 2454 isused = pressures; 2455 } else { 2456 isused = zerodiag; 2457 } 2458 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2459 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2460 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2461 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2462 n_isused = 0; 2463 if (isused) { 2464 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2465 } 2466 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2467 st = st-n_isused; 2468 if (n) { 2469 const PetscInt *gidxs; 2470 2471 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2472 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2473 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2474 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2475 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2476 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2477 } else { 2478 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2479 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2480 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2481 } 2482 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2483 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2484 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2485 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2486 ierr = ISDestroy(&row);CHKERRQ(ierr); 2487 ierr = ISDestroy(&col);CHKERRQ(ierr); 2488 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2489 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2490 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2491 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2492 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2493 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2494 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2495 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2496 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2498 } 2499 2500 /* change of basis and p0 dofs */ 2501 if (has_null_pressures) { 2502 IS zerodiagc; 2503 const PetscInt *idxs,*idxsc; 2504 PetscInt i,s,*nnz; 2505 2506 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2507 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2508 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2509 /* local change of basis for pressures */ 2510 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2511 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2512 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2513 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2514 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2515 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2516 for (i=0;i<pcbddc->benign_n;i++) { 2517 PetscInt nzs,j; 2518 2519 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2520 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2521 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2522 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2523 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2524 } 2525 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2526 ierr = PetscFree(nnz);CHKERRQ(ierr); 2527 /* set identity on velocities */ 2528 for (i=0;i<n-nz;i++) { 2529 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2530 } 2531 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2532 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2533 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2534 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2535 /* set change on pressures */ 2536 for (s=0;s<pcbddc->benign_n;s++) { 2537 PetscScalar *array; 2538 PetscInt nzs; 2539 2540 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2541 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2542 for (i=0;i<nzs-1;i++) { 2543 PetscScalar vals[2]; 2544 PetscInt cols[2]; 2545 2546 cols[0] = idxs[i]; 2547 cols[1] = idxs[nzs-1]; 2548 vals[0] = 1.; 2549 vals[1] = 1.; 2550 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2551 } 2552 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2553 for (i=0;i<nzs-1;i++) array[i] = -1.; 2554 array[nzs-1] = 1.; 2555 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2556 /* store local idxs for p0 */ 2557 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2558 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2559 ierr = PetscFree(array);CHKERRQ(ierr); 2560 } 2561 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2562 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2563 /* project if needed */ 2564 if (pcbddc->benign_change_explicit) { 2565 Mat M; 2566 2567 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2568 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2569 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2570 ierr = MatDestroy(&M);CHKERRQ(ierr); 2571 } 2572 /* store global idxs for p0 */ 2573 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2574 } 2575 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2576 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2577 2578 /* determines if the coarse solver will be singular or not */ 2579 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2580 /* determines if the problem has subdomains with 0 pressure block */ 2581 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2582 *zerodiaglocal = zerodiag; 2583 PetscFunctionReturn(0); 2584 } 2585 2586 #undef __FUNCT__ 2587 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2588 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2589 { 2590 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2591 PetscScalar *array; 2592 PetscErrorCode ierr; 2593 2594 PetscFunctionBegin; 2595 if (!pcbddc->benign_sf) { 2596 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2597 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2598 } 2599 if (get) { 2600 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2601 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2602 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2603 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2604 } else { 2605 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2606 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2607 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2608 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2609 } 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2615 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2616 { 2617 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2618 PetscErrorCode ierr; 2619 2620 PetscFunctionBegin; 2621 /* TODO: add error checking 2622 - avoid nested pop (or push) calls. 2623 - cannot push before pop. 2624 - cannot call this if pcbddc->local_mat is NULL 2625 */ 2626 if (!pcbddc->benign_n) { 2627 PetscFunctionReturn(0); 2628 } 2629 if (pop) { 2630 if (pcbddc->benign_change_explicit) { 2631 IS is_p0; 2632 MatReuse reuse; 2633 2634 /* extract B_0 */ 2635 reuse = MAT_INITIAL_MATRIX; 2636 if (pcbddc->benign_B0) { 2637 reuse = MAT_REUSE_MATRIX; 2638 } 2639 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2640 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2641 /* remove rows and cols from local problem */ 2642 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2643 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2644 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2645 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2646 } else { 2647 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2648 PetscScalar *vals; 2649 PetscInt i,n,*idxs_ins; 2650 2651 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2652 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2653 if (!pcbddc->benign_B0) { 2654 PetscInt *nnz; 2655 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2656 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2657 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2658 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2659 for (i=0;i<pcbddc->benign_n;i++) { 2660 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2661 nnz[i] = n - nnz[i]; 2662 } 2663 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2664 ierr = PetscFree(nnz);CHKERRQ(ierr); 2665 } 2666 2667 for (i=0;i<pcbddc->benign_n;i++) { 2668 PetscScalar *array; 2669 PetscInt *idxs,j,nz,cum; 2670 2671 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2672 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2673 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2674 for (j=0;j<nz;j++) vals[j] = 1.; 2675 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2676 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2677 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2678 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2679 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2680 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2681 cum = 0; 2682 for (j=0;j<n;j++) { 2683 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2684 vals[cum] = array[j]; 2685 idxs_ins[cum] = j; 2686 cum++; 2687 } 2688 } 2689 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2690 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2691 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2692 } 2693 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2694 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2695 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2696 } 2697 } else { /* push */ 2698 if (pcbddc->benign_change_explicit) { 2699 PetscInt i; 2700 2701 for (i=0;i<pcbddc->benign_n;i++) { 2702 PetscScalar *B0_vals; 2703 PetscInt *B0_cols,B0_ncol; 2704 2705 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2706 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2707 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2708 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2709 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2710 } 2711 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2712 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2713 } else { 2714 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2715 } 2716 } 2717 PetscFunctionReturn(0); 2718 } 2719 2720 #undef __FUNCT__ 2721 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2722 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2723 { 2724 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2725 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2726 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2727 PetscBLASInt *B_iwork,*B_ifail; 2728 PetscScalar *work,lwork; 2729 PetscScalar *St,*S,*eigv; 2730 PetscScalar *Sarray,*Starray; 2731 PetscReal *eigs,thresh; 2732 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2733 PetscBool allocated_S_St; 2734 #if defined(PETSC_USE_COMPLEX) 2735 PetscReal *rwork; 2736 #endif 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2741 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2742 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2743 2744 if (pcbddc->dbg_flag) { 2745 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2746 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2747 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2748 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2749 } 2750 2751 if (pcbddc->dbg_flag) { 2752 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2753 } 2754 2755 /* max size of subsets */ 2756 mss = 0; 2757 for (i=0;i<sub_schurs->n_subs;i++) { 2758 PetscInt subset_size; 2759 2760 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2761 mss = PetscMax(mss,subset_size); 2762 } 2763 2764 /* min/max and threshold */ 2765 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2766 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2767 nmax = PetscMax(nmin,nmax); 2768 allocated_S_St = PETSC_FALSE; 2769 if (nmin) { 2770 allocated_S_St = PETSC_TRUE; 2771 } 2772 2773 /* allocate lapack workspace */ 2774 cum = cum2 = 0; 2775 maxneigs = 0; 2776 for (i=0;i<sub_schurs->n_subs;i++) { 2777 PetscInt n,subset_size; 2778 2779 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2780 n = PetscMin(subset_size,nmax); 2781 cum += subset_size; 2782 cum2 += subset_size*n; 2783 maxneigs = PetscMax(maxneigs,n); 2784 } 2785 if (mss) { 2786 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2787 PetscBLASInt B_itype = 1; 2788 PetscBLASInt B_N = mss; 2789 PetscReal zero = 0.0; 2790 PetscReal eps = 0.0; /* dlamch? */ 2791 2792 B_lwork = -1; 2793 S = NULL; 2794 St = NULL; 2795 eigs = NULL; 2796 eigv = NULL; 2797 B_iwork = NULL; 2798 B_ifail = NULL; 2799 #if defined(PETSC_USE_COMPLEX) 2800 rwork = NULL; 2801 #endif 2802 thresh = 1.0; 2803 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2804 #if defined(PETSC_USE_COMPLEX) 2805 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2806 #else 2807 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2808 #endif 2809 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2810 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2811 } else { 2812 /* TODO */ 2813 } 2814 } else { 2815 lwork = 0; 2816 } 2817 2818 nv = 0; 2819 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2820 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2821 } 2822 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2823 if (allocated_S_St) { 2824 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2825 } 2826 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2827 #if defined(PETSC_USE_COMPLEX) 2828 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2829 #endif 2830 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2831 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2832 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2833 nv+cum,&pcbddc->adaptive_constraints_idxs, 2834 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2835 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2836 2837 maxneigs = 0; 2838 cum = cumarray = 0; 2839 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2840 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2841 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2842 const PetscInt *idxs; 2843 2844 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2845 for (cum=0;cum<nv;cum++) { 2846 pcbddc->adaptive_constraints_n[cum] = 1; 2847 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2848 pcbddc->adaptive_constraints_data[cum] = 1.0; 2849 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2850 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2851 } 2852 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2853 } 2854 2855 if (mss) { /* multilevel */ 2856 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2857 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2858 } 2859 2860 thresh = pcbddc->adaptive_threshold; 2861 for (i=0;i<sub_schurs->n_subs;i++) { 2862 const PetscInt *idxs; 2863 PetscReal upper,lower; 2864 PetscInt j,subset_size,eigs_start = 0; 2865 PetscBLASInt B_N; 2866 PetscBool same_data = PETSC_FALSE; 2867 2868 if (pcbddc->use_deluxe_scaling) { 2869 upper = PETSC_MAX_REAL; 2870 lower = thresh; 2871 } else { 2872 upper = 1./thresh; 2873 lower = 0.; 2874 } 2875 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2876 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2877 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2878 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2879 if (sub_schurs->is_hermitian) { 2880 PetscInt j,k; 2881 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2882 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2883 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2884 } 2885 for (j=0;j<subset_size;j++) { 2886 for (k=j;k<subset_size;k++) { 2887 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2888 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2889 } 2890 } 2891 } else { 2892 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2893 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2894 } 2895 } else { 2896 S = Sarray + cumarray; 2897 St = Starray + cumarray; 2898 } 2899 /* see if we can save some work */ 2900 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2901 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2902 } 2903 2904 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2905 B_neigs = 0; 2906 } else { 2907 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2908 PetscBLASInt B_itype = 1; 2909 PetscBLASInt B_IL, B_IU; 2910 PetscReal eps = -1.0; /* dlamch? */ 2911 PetscInt nmin_s; 2912 PetscBool compute_range = PETSC_FALSE; 2913 2914 if (pcbddc->dbg_flag) { 2915 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2916 } 2917 2918 compute_range = PETSC_FALSE; 2919 if (thresh > 1.+PETSC_SMALL && !same_data) { 2920 compute_range = PETSC_TRUE; 2921 } 2922 2923 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2924 if (compute_range) { 2925 2926 /* ask for eigenvalues larger than thresh */ 2927 #if defined(PETSC_USE_COMPLEX) 2928 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2929 #else 2930 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2931 #endif 2932 } else if (!same_data) { 2933 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2934 B_IL = 1; 2935 #if defined(PETSC_USE_COMPLEX) 2936 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2937 #else 2938 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2939 #endif 2940 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2941 PetscInt k; 2942 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2943 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2944 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2945 nmin = nmax; 2946 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2947 for (k=0;k<nmax;k++) { 2948 eigs[k] = 1./PETSC_SMALL; 2949 eigv[k*(subset_size+1)] = 1.0; 2950 } 2951 } 2952 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2953 if (B_ierr) { 2954 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2955 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2956 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2957 } 2958 2959 if (B_neigs > nmax) { 2960 if (pcbddc->dbg_flag) { 2961 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2962 } 2963 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2964 B_neigs = nmax; 2965 } 2966 2967 nmin_s = PetscMin(nmin,B_N); 2968 if (B_neigs < nmin_s) { 2969 PetscBLASInt B_neigs2; 2970 2971 if (pcbddc->use_deluxe_scaling) { 2972 B_IL = B_N - nmin_s + 1; 2973 B_IU = B_N - B_neigs; 2974 } else { 2975 B_IL = B_neigs + 1; 2976 B_IU = nmin_s; 2977 } 2978 if (pcbddc->dbg_flag) { 2979 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2980 } 2981 if (sub_schurs->is_hermitian) { 2982 PetscInt j,k; 2983 for (j=0;j<subset_size;j++) { 2984 for (k=j;k<subset_size;k++) { 2985 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2986 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2987 } 2988 } 2989 } else { 2990 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2991 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2992 } 2993 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2994 #if defined(PETSC_USE_COMPLEX) 2995 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2996 #else 2997 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2998 #endif 2999 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3000 B_neigs += B_neigs2; 3001 } 3002 if (B_ierr) { 3003 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3004 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3005 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3006 } 3007 if (pcbddc->dbg_flag) { 3008 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3009 for (j=0;j<B_neigs;j++) { 3010 if (eigs[j] == 0.0) { 3011 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3012 } else { 3013 if (pcbddc->use_deluxe_scaling) { 3014 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3015 } else { 3016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3017 } 3018 } 3019 } 3020 } 3021 } else { 3022 /* TODO */ 3023 } 3024 } 3025 /* change the basis back to the original one */ 3026 if (sub_schurs->change) { 3027 Mat change,phi,phit; 3028 3029 if (pcbddc->dbg_flag > 1) { 3030 PetscInt ii; 3031 for (ii=0;ii<B_neigs;ii++) { 3032 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3033 for (j=0;j<B_N;j++) { 3034 #if defined(PETSC_USE_COMPLEX) 3035 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3036 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3038 #else 3039 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3040 #endif 3041 } 3042 } 3043 } 3044 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3045 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3046 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3047 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3048 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3049 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3050 } 3051 maxneigs = PetscMax(B_neigs,maxneigs); 3052 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3053 if (B_neigs) { 3054 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3055 3056 if (pcbddc->dbg_flag > 1) { 3057 PetscInt ii; 3058 for (ii=0;ii<B_neigs;ii++) { 3059 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3060 for (j=0;j<B_N;j++) { 3061 #if defined(PETSC_USE_COMPLEX) 3062 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3063 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3064 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3065 #else 3066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3067 #endif 3068 } 3069 } 3070 } 3071 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3072 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3073 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3074 cum++; 3075 } 3076 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3077 /* shift for next computation */ 3078 cumarray += subset_size*subset_size; 3079 } 3080 if (pcbddc->dbg_flag) { 3081 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3082 } 3083 3084 if (mss) { 3085 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3086 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3087 /* destroy matrices (junk) */ 3088 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3089 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3090 } 3091 if (allocated_S_St) { 3092 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3093 } 3094 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3095 #if defined(PETSC_USE_COMPLEX) 3096 ierr = PetscFree(rwork);CHKERRQ(ierr); 3097 #endif 3098 if (pcbddc->dbg_flag) { 3099 PetscInt maxneigs_r; 3100 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3101 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3102 } 3103 PetscFunctionReturn(0); 3104 } 3105 3106 #undef __FUNCT__ 3107 #define __FUNCT__ "PCBDDCSetUpSolvers" 3108 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3109 { 3110 PetscScalar *coarse_submat_vals; 3111 PetscErrorCode ierr; 3112 3113 PetscFunctionBegin; 3114 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3115 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3116 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3117 3118 /* Setup local neumann solver ksp_R */ 3119 /* PCBDDCSetUpLocalScatters should be called first! */ 3120 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3121 3122 /* 3123 Setup local correction and local part of coarse basis. 3124 Gives back the dense local part of the coarse matrix in column major ordering 3125 */ 3126 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3127 3128 /* Compute total number of coarse nodes and setup coarse solver */ 3129 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3130 3131 /* free */ 3132 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3133 PetscFunctionReturn(0); 3134 } 3135 3136 #undef __FUNCT__ 3137 #define __FUNCT__ "PCBDDCResetCustomization" 3138 PetscErrorCode PCBDDCResetCustomization(PC pc) 3139 { 3140 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3141 PetscErrorCode ierr; 3142 3143 PetscFunctionBegin; 3144 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3145 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3146 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3147 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3148 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3149 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3150 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3151 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3152 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3153 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3154 PetscFunctionReturn(0); 3155 } 3156 3157 #undef __FUNCT__ 3158 #define __FUNCT__ "PCBDDCResetTopography" 3159 PetscErrorCode PCBDDCResetTopography(PC pc) 3160 { 3161 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3162 PetscInt i; 3163 PetscErrorCode ierr; 3164 3165 PetscFunctionBegin; 3166 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3167 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3168 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3169 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3170 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3171 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3172 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3173 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3174 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3175 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3176 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3177 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3178 for (i=0;i<pcbddc->n_local_subs;i++) { 3179 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3180 } 3181 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3182 if (pcbddc->sub_schurs) { 3183 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 3184 } 3185 pcbddc->graphanalyzed = PETSC_FALSE; 3186 pcbddc->recompute_topography = PETSC_TRUE; 3187 PetscFunctionReturn(0); 3188 } 3189 3190 #undef __FUNCT__ 3191 #define __FUNCT__ "PCBDDCResetSolvers" 3192 PetscErrorCode PCBDDCResetSolvers(PC pc) 3193 { 3194 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3195 PetscErrorCode ierr; 3196 3197 PetscFunctionBegin; 3198 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3199 if (pcbddc->coarse_phi_B) { 3200 PetscScalar *array; 3201 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3202 ierr = PetscFree(array);CHKERRQ(ierr); 3203 } 3204 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3205 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3206 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3207 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3208 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3209 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3210 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3211 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3212 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3213 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3214 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3215 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3216 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3217 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3218 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 3219 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 3220 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3221 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3222 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3223 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3224 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3225 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3226 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3227 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3228 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3229 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3230 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3231 if (pcbddc->benign_zerodiag_subs) { 3232 PetscInt i; 3233 for (i=0;i<pcbddc->benign_n;i++) { 3234 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3235 } 3236 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3237 } 3238 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3239 PetscFunctionReturn(0); 3240 } 3241 3242 #undef __FUNCT__ 3243 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 3244 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3245 { 3246 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3247 PC_IS *pcis = (PC_IS*)pc->data; 3248 VecType impVecType; 3249 PetscInt n_constraints,n_R,old_size; 3250 PetscErrorCode ierr; 3251 3252 PetscFunctionBegin; 3253 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3254 n_R = pcis->n - pcbddc->n_vertices; 3255 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3256 /* local work vectors (try to avoid unneeded work)*/ 3257 /* R nodes */ 3258 old_size = -1; 3259 if (pcbddc->vec1_R) { 3260 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3261 } 3262 if (n_R != old_size) { 3263 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3264 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3265 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3266 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3267 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3268 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3269 } 3270 /* local primal dofs */ 3271 old_size = -1; 3272 if (pcbddc->vec1_P) { 3273 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3274 } 3275 if (pcbddc->local_primal_size != old_size) { 3276 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3277 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3278 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3279 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3280 } 3281 /* local explicit constraints */ 3282 old_size = -1; 3283 if (pcbddc->vec1_C) { 3284 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3285 } 3286 if (n_constraints && n_constraints != old_size) { 3287 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3288 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3289 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3290 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3291 } 3292 PetscFunctionReturn(0); 3293 } 3294 3295 #undef __FUNCT__ 3296 #define __FUNCT__ "PCBDDCSetUpCorrection" 3297 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3298 { 3299 PetscErrorCode ierr; 3300 /* pointers to pcis and pcbddc */ 3301 PC_IS* pcis = (PC_IS*)pc->data; 3302 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3303 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3304 /* submatrices of local problem */ 3305 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3306 /* submatrices of local coarse problem */ 3307 Mat S_VV,S_CV,S_VC,S_CC; 3308 /* working matrices */ 3309 Mat C_CR; 3310 /* additional working stuff */ 3311 PC pc_R; 3312 Mat F; 3313 Vec dummy_vec; 3314 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3315 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3316 PetscScalar *work; 3317 PetscInt *idx_V_B; 3318 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3319 PetscInt i,n_R,n_D,n_B; 3320 3321 /* some shortcuts to scalars */ 3322 PetscScalar one=1.0,m_one=-1.0; 3323 3324 PetscFunctionBegin; 3325 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3326 3327 /* Set Non-overlapping dimensions */ 3328 n_vertices = pcbddc->n_vertices; 3329 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3330 n_B = pcis->n_B; 3331 n_D = pcis->n - n_B; 3332 n_R = pcis->n - n_vertices; 3333 3334 /* vertices in boundary numbering */ 3335 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3336 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3337 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3338 3339 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3340 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3341 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3342 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3343 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3344 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3345 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3346 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3347 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3348 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3349 3350 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3351 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3352 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3353 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3354 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3355 lda_rhs = n_R; 3356 need_benign_correction = PETSC_FALSE; 3357 if (isLU || isILU || isCHOL) { 3358 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3359 } else if (sub_schurs && sub_schurs->reuse_solver) { 3360 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3361 MatFactorType type; 3362 3363 F = reuse_solver->F; 3364 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3365 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3366 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3367 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3368 } else { 3369 F = NULL; 3370 } 3371 3372 /* allocate workspace */ 3373 n = 0; 3374 if (n_constraints) { 3375 n += lda_rhs*n_constraints; 3376 } 3377 if (n_vertices) { 3378 n = PetscMax(2*lda_rhs*n_vertices,n); 3379 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3380 } 3381 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3382 3383 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3384 dummy_vec = NULL; 3385 if (need_benign_correction && lda_rhs != n_R && F) { 3386 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3387 } 3388 3389 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3390 if (n_constraints) { 3391 Mat M1,M2,M3,C_B; 3392 IS is_aux; 3393 PetscScalar *array,*array2; 3394 3395 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3396 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3397 3398 /* Extract constraints on R nodes: C_{CR} */ 3399 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3400 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3401 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3402 3403 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3404 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3405 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3406 for (i=0;i<n_constraints;i++) { 3407 const PetscScalar *row_cmat_values; 3408 const PetscInt *row_cmat_indices; 3409 PetscInt size_of_constraint,j; 3410 3411 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3412 for (j=0;j<size_of_constraint;j++) { 3413 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3414 } 3415 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3416 } 3417 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3418 if (F) { 3419 Mat B; 3420 3421 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3422 if (need_benign_correction) { 3423 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3424 3425 /* rhs is already zero on interior dofs, no need to change the rhs */ 3426 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3427 } 3428 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3429 if (need_benign_correction) { 3430 PetscScalar *marr; 3431 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3432 3433 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3434 if (lda_rhs != n_R) { 3435 for (i=0;i<n_constraints;i++) { 3436 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3437 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3438 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3439 } 3440 } else { 3441 for (i=0;i<n_constraints;i++) { 3442 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3443 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3444 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3445 } 3446 } 3447 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3448 } 3449 ierr = MatDestroy(&B);CHKERRQ(ierr); 3450 } else { 3451 PetscScalar *marr; 3452 3453 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3454 for (i=0;i<n_constraints;i++) { 3455 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3456 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3457 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3458 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3459 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3460 } 3461 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3462 } 3463 if (!pcbddc->switch_static) { 3464 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3465 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3466 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3467 for (i=0;i<n_constraints;i++) { 3468 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3469 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3470 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3471 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3472 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3473 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3474 } 3475 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3476 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3477 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3478 } else { 3479 if (lda_rhs != n_R) { 3480 IS dummy; 3481 3482 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3483 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3484 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3485 } else { 3486 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3487 pcbddc->local_auxmat2 = local_auxmat2_R; 3488 } 3489 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3490 } 3491 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3492 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3493 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3494 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3495 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3496 if (isCHOL) { 3497 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3498 } else { 3499 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3500 } 3501 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3502 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3503 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3504 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3505 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3506 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3507 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3508 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3509 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3510 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3511 } 3512 3513 /* Get submatrices from subdomain matrix */ 3514 if (n_vertices) { 3515 IS is_aux; 3516 3517 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3518 IS tis; 3519 3520 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3521 ierr = ISSort(tis);CHKERRQ(ierr); 3522 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3523 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3524 } else { 3525 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3526 } 3527 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3528 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3529 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3530 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3531 } 3532 3533 /* Matrix of coarse basis functions (local) */ 3534 if (pcbddc->coarse_phi_B) { 3535 PetscInt on_B,on_primal,on_D=n_D; 3536 if (pcbddc->coarse_phi_D) { 3537 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3538 } 3539 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3540 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3541 PetscScalar *marray; 3542 3543 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3544 ierr = PetscFree(marray);CHKERRQ(ierr); 3545 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3546 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3547 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3548 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3549 } 3550 } 3551 3552 if (!pcbddc->coarse_phi_B) { 3553 PetscScalar *marray; 3554 3555 n = n_B*pcbddc->local_primal_size; 3556 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3557 n += n_D*pcbddc->local_primal_size; 3558 } 3559 if (!pcbddc->symmetric_primal) { 3560 n *= 2; 3561 } 3562 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3564 n = n_B*pcbddc->local_primal_size; 3565 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3566 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3567 n += n_D*pcbddc->local_primal_size; 3568 } 3569 if (!pcbddc->symmetric_primal) { 3570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3571 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3572 n = n_B*pcbddc->local_primal_size; 3573 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3574 } 3575 } else { 3576 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3577 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3578 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3579 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3580 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3581 } 3582 } 3583 } 3584 3585 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3586 p0_lidx_I = NULL; 3587 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3588 const PetscInt *idxs; 3589 3590 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3591 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3592 for (i=0;i<pcbddc->benign_n;i++) { 3593 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3594 } 3595 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3596 } 3597 3598 /* vertices */ 3599 if (n_vertices) { 3600 3601 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3602 3603 if (n_R) { 3604 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3605 PetscBLASInt B_N,B_one = 1; 3606 PetscScalar *x,*y; 3607 PetscBool isseqaij; 3608 3609 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3610 if (need_benign_correction) { 3611 ISLocalToGlobalMapping RtoN; 3612 IS is_p0; 3613 PetscInt *idxs_p0,n; 3614 3615 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3616 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3617 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3618 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3619 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3620 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3621 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3622 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3623 } 3624 3625 if (lda_rhs == n_R) { 3626 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3627 } else { 3628 PetscScalar *av,*array; 3629 const PetscInt *xadj,*adjncy; 3630 PetscInt n; 3631 PetscBool flg_row; 3632 3633 array = work+lda_rhs*n_vertices; 3634 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3635 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3636 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3637 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3638 for (i=0;i<n;i++) { 3639 PetscInt j; 3640 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3641 } 3642 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3643 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3644 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3645 } 3646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3647 if (need_benign_correction) { 3648 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3649 PetscScalar *marr; 3650 3651 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3652 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3653 3654 | 0 0 0 | (V) 3655 L = | 0 0 -1 | (P-p0) 3656 | 0 0 -1 | (p0) 3657 3658 */ 3659 for (i=0;i<reuse_solver->benign_n;i++) { 3660 const PetscScalar *vals; 3661 const PetscInt *idxs,*idxs_zero; 3662 PetscInt n,j,nz; 3663 3664 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3665 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3666 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3667 for (j=0;j<n;j++) { 3668 PetscScalar val = vals[j]; 3669 PetscInt k,col = idxs[j]; 3670 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3671 } 3672 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3673 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3674 } 3675 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3676 } 3677 if (F) { 3678 /* need to correct the rhs */ 3679 if (need_benign_correction) { 3680 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3681 PetscScalar *marr; 3682 3683 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3684 if (lda_rhs != n_R) { 3685 for (i=0;i<n_vertices;i++) { 3686 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3687 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3688 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3689 } 3690 } else { 3691 for (i=0;i<n_vertices;i++) { 3692 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3693 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3694 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3695 } 3696 } 3697 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3698 } 3699 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3700 /* need to correct the solution */ 3701 if (need_benign_correction) { 3702 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3703 PetscScalar *marr; 3704 3705 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3706 if (lda_rhs != n_R) { 3707 for (i=0;i<n_vertices;i++) { 3708 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3709 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3710 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3711 } 3712 } else { 3713 for (i=0;i<n_vertices;i++) { 3714 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3715 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3716 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3717 } 3718 } 3719 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3720 } 3721 } else { 3722 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3723 for (i=0;i<n_vertices;i++) { 3724 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3725 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3726 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3727 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3728 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3729 } 3730 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3731 } 3732 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3733 /* S_VV and S_CV */ 3734 if (n_constraints) { 3735 Mat B; 3736 3737 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3738 for (i=0;i<n_vertices;i++) { 3739 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3740 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3741 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3742 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3743 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3744 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3745 } 3746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3747 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3748 ierr = MatDestroy(&B);CHKERRQ(ierr); 3749 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3750 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3751 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3752 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3753 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3754 ierr = MatDestroy(&B);CHKERRQ(ierr); 3755 } 3756 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3757 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3758 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3759 } 3760 if (lda_rhs != n_R) { 3761 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3762 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3763 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3764 } 3765 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3766 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3767 if (need_benign_correction) { 3768 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3769 PetscScalar *marr,*sums; 3770 3771 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3772 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3773 for (i=0;i<reuse_solver->benign_n;i++) { 3774 const PetscScalar *vals; 3775 const PetscInt *idxs,*idxs_zero; 3776 PetscInt n,j,nz; 3777 3778 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3779 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3780 for (j=0;j<n_vertices;j++) { 3781 PetscInt k; 3782 sums[j] = 0.; 3783 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3784 } 3785 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3786 for (j=0;j<n;j++) { 3787 PetscScalar val = vals[j]; 3788 PetscInt k; 3789 for (k=0;k<n_vertices;k++) { 3790 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3791 } 3792 } 3793 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3794 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3795 } 3796 ierr = PetscFree(sums);CHKERRQ(ierr); 3797 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3798 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3799 } 3800 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3801 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3802 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3803 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3804 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3805 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3806 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3807 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3808 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3809 } else { 3810 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3811 } 3812 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3813 3814 /* coarse basis functions */ 3815 for (i=0;i<n_vertices;i++) { 3816 PetscScalar *y; 3817 3818 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3819 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3820 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3821 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3822 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3823 y[n_B*i+idx_V_B[i]] = 1.0; 3824 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3825 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3826 3827 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3828 PetscInt j; 3829 3830 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3831 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3832 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3833 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3834 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3835 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3836 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3837 } 3838 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3839 } 3840 /* if n_R == 0 the object is not destroyed */ 3841 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3842 } 3843 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3844 3845 if (n_constraints) { 3846 Mat B; 3847 3848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3849 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3850 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3851 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3852 if (n_vertices) { 3853 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3854 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3855 } else { 3856 Mat S_VCt; 3857 3858 if (lda_rhs != n_R) { 3859 ierr = MatDestroy(&B);CHKERRQ(ierr); 3860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3861 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3862 } 3863 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3864 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3865 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3866 } 3867 } 3868 ierr = MatDestroy(&B);CHKERRQ(ierr); 3869 /* coarse basis functions */ 3870 for (i=0;i<n_constraints;i++) { 3871 PetscScalar *y; 3872 3873 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3874 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3875 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3876 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3877 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3878 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3879 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3880 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3881 PetscInt j; 3882 3883 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3884 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3885 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3886 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3887 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3888 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3889 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3890 } 3891 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3892 } 3893 } 3894 if (n_constraints) { 3895 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3896 } 3897 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3898 3899 /* coarse matrix entries relative to B_0 */ 3900 if (pcbddc->benign_n) { 3901 Mat B0_B,B0_BPHI; 3902 IS is_dummy; 3903 PetscScalar *data; 3904 PetscInt j; 3905 3906 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3907 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3908 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3909 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3910 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3911 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3912 for (j=0;j<pcbddc->benign_n;j++) { 3913 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3914 for (i=0;i<pcbddc->local_primal_size;i++) { 3915 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3916 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3917 } 3918 } 3919 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3920 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3921 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3922 } 3923 3924 /* compute other basis functions for non-symmetric problems */ 3925 if (!pcbddc->symmetric_primal) { 3926 Mat B_V=NULL,B_C=NULL; 3927 PetscScalar *marray; 3928 3929 if (n_constraints) { 3930 Mat S_CCT,C_CRT; 3931 3932 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3933 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3934 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3935 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3936 if (n_vertices) { 3937 Mat S_VCT; 3938 3939 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3940 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3941 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3942 } 3943 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3944 } else { 3945 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3946 } 3947 if (n_vertices && n_R) { 3948 PetscScalar *av,*marray; 3949 const PetscInt *xadj,*adjncy; 3950 PetscInt n; 3951 PetscBool flg_row; 3952 3953 /* B_V = B_V - A_VR^T */ 3954 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3955 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3956 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3957 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3958 for (i=0;i<n;i++) { 3959 PetscInt j; 3960 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3961 } 3962 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3963 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3964 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3965 } 3966 3967 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3968 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3969 for (i=0;i<n_vertices;i++) { 3970 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3971 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3972 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3973 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3974 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3975 } 3976 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3977 if (B_C) { 3978 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3979 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3980 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3981 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3982 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3983 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3984 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3985 } 3986 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3987 } 3988 /* coarse basis functions */ 3989 for (i=0;i<pcbddc->local_primal_size;i++) { 3990 PetscScalar *y; 3991 3992 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3993 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3994 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3995 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3996 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3997 if (i<n_vertices) { 3998 y[n_B*i+idx_V_B[i]] = 1.0; 3999 } 4000 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4001 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4002 4003 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4004 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4005 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4006 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4007 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4008 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4009 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4010 } 4011 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4012 } 4013 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4014 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4015 } 4016 /* free memory */ 4017 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4018 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4019 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4020 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4021 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4022 ierr = PetscFree(work);CHKERRQ(ierr); 4023 if (n_vertices) { 4024 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4025 } 4026 if (n_constraints) { 4027 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4028 } 4029 /* Checking coarse_sub_mat and coarse basis functios */ 4030 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4031 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4032 if (pcbddc->dbg_flag) { 4033 Mat coarse_sub_mat; 4034 Mat AUXMAT,TM1,TM2,TM3,TM4; 4035 Mat coarse_phi_D,coarse_phi_B; 4036 Mat coarse_psi_D,coarse_psi_B; 4037 Mat A_II,A_BB,A_IB,A_BI; 4038 Mat C_B,CPHI; 4039 IS is_dummy; 4040 Vec mones; 4041 MatType checkmattype=MATSEQAIJ; 4042 PetscReal real_value; 4043 4044 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4045 Mat A; 4046 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4047 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4048 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4049 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4050 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4051 ierr = MatDestroy(&A);CHKERRQ(ierr); 4052 } else { 4053 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4054 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4055 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4056 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4057 } 4058 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4059 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4060 if (!pcbddc->symmetric_primal) { 4061 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4062 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4063 } 4064 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4065 4066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4068 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4069 if (!pcbddc->symmetric_primal) { 4070 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4071 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4072 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4073 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4074 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4075 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4076 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4077 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4078 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4079 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4080 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4081 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4082 } else { 4083 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4084 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4085 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4086 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4087 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4088 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4089 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4090 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4091 } 4092 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4093 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4094 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4095 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4096 if (pcbddc->benign_n) { 4097 Mat B0_B,B0_BPHI; 4098 PetscScalar *data,*data2; 4099 PetscInt j; 4100 4101 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4102 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4103 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4104 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4105 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4106 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4107 for (j=0;j<pcbddc->benign_n;j++) { 4108 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4109 for (i=0;i<pcbddc->local_primal_size;i++) { 4110 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4111 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4112 } 4113 } 4114 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4115 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4116 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4117 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4118 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4119 } 4120 #if 0 4121 { 4122 PetscViewer viewer; 4123 char filename[256]; 4124 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4125 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4126 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4127 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4128 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4129 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4130 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4131 if (save_change) { 4132 Mat phi_B; 4133 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4134 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4135 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4136 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4137 } else { 4138 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4139 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4140 } 4141 if (pcbddc->coarse_phi_D) { 4142 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4143 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4144 } 4145 if (pcbddc->coarse_psi_B) { 4146 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4147 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4148 } 4149 if (pcbddc->coarse_psi_D) { 4150 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4151 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4152 } 4153 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4154 } 4155 #endif 4156 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4157 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4158 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4159 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4160 4161 /* check constraints */ 4162 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4163 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4164 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4165 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4166 } else { 4167 PetscScalar *data; 4168 Mat tmat; 4169 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4170 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4171 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4172 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4173 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4174 } 4175 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4176 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4177 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4178 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4180 if (!pcbddc->symmetric_primal) { 4181 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4182 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4183 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4184 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4185 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4186 } 4187 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4188 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4189 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4190 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4191 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4192 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4193 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4194 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4195 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4196 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4197 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4198 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4199 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4200 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4201 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4202 if (!pcbddc->symmetric_primal) { 4203 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4204 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4205 } 4206 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4207 } 4208 /* get back data */ 4209 *coarse_submat_vals_n = coarse_submat_vals; 4210 PetscFunctionReturn(0); 4211 } 4212 4213 #undef __FUNCT__ 4214 #define __FUNCT__ "MatGetSubMatrixUnsorted" 4215 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4216 { 4217 Mat *work_mat; 4218 IS isrow_s,iscol_s; 4219 PetscBool rsorted,csorted; 4220 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4221 PetscErrorCode ierr; 4222 4223 PetscFunctionBegin; 4224 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4225 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4226 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4227 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4228 4229 if (!rsorted) { 4230 const PetscInt *idxs; 4231 PetscInt *idxs_sorted,i; 4232 4233 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4234 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4235 for (i=0;i<rsize;i++) { 4236 idxs_perm_r[i] = i; 4237 } 4238 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4239 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4240 for (i=0;i<rsize;i++) { 4241 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4242 } 4243 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4244 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4245 } else { 4246 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4247 isrow_s = isrow; 4248 } 4249 4250 if (!csorted) { 4251 if (isrow == iscol) { 4252 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4253 iscol_s = isrow_s; 4254 } else { 4255 const PetscInt *idxs; 4256 PetscInt *idxs_sorted,i; 4257 4258 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4259 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4260 for (i=0;i<csize;i++) { 4261 idxs_perm_c[i] = i; 4262 } 4263 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4264 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4265 for (i=0;i<csize;i++) { 4266 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4267 } 4268 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4269 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4270 } 4271 } else { 4272 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4273 iscol_s = iscol; 4274 } 4275 4276 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4277 4278 if (!rsorted || !csorted) { 4279 Mat new_mat; 4280 IS is_perm_r,is_perm_c; 4281 4282 if (!rsorted) { 4283 PetscInt *idxs_r,i; 4284 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4285 for (i=0;i<rsize;i++) { 4286 idxs_r[idxs_perm_r[i]] = i; 4287 } 4288 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4289 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4290 } else { 4291 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4292 } 4293 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4294 4295 if (!csorted) { 4296 if (isrow_s == iscol_s) { 4297 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4298 is_perm_c = is_perm_r; 4299 } else { 4300 PetscInt *idxs_c,i; 4301 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4302 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4303 for (i=0;i<csize;i++) { 4304 idxs_c[idxs_perm_c[i]] = i; 4305 } 4306 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4307 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4308 } 4309 } else { 4310 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4311 } 4312 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4313 4314 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4315 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4316 work_mat[0] = new_mat; 4317 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4318 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4319 } 4320 4321 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4322 *B = work_mat[0]; 4323 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4324 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4325 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4326 PetscFunctionReturn(0); 4327 } 4328 4329 #undef __FUNCT__ 4330 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4331 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4332 { 4333 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4334 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4335 Mat new_mat; 4336 IS is_local,is_global; 4337 PetscInt local_size; 4338 PetscBool isseqaij; 4339 PetscErrorCode ierr; 4340 4341 PetscFunctionBegin; 4342 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4343 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4344 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4345 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4346 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4347 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4348 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4349 4350 /* check */ 4351 if (pcbddc->dbg_flag) { 4352 Vec x,x_change; 4353 PetscReal error; 4354 4355 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4356 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4357 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4358 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4359 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4360 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4361 if (!pcbddc->change_interior) { 4362 const PetscScalar *x,*y,*v; 4363 PetscReal lerror = 0.; 4364 PetscInt i; 4365 4366 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4367 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4368 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4369 for (i=0;i<local_size;i++) 4370 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4371 lerror = PetscAbsScalar(x[i]-y[i]); 4372 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4373 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4374 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4375 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4376 if (error > PETSC_SMALL) { 4377 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4378 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4379 } else { 4380 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4381 } 4382 } 4383 } 4384 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4385 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4386 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4387 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4388 if (error > PETSC_SMALL) { 4389 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4390 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4391 } else { 4392 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4393 } 4394 } 4395 ierr = VecDestroy(&x);CHKERRQ(ierr); 4396 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4397 } 4398 4399 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4400 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4401 if (isseqaij) { 4402 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4403 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4404 } else { 4405 Mat work_mat; 4406 4407 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4408 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4409 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4410 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4411 } 4412 if (matis->A->symmetric_set) { 4413 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4414 #if !defined(PETSC_USE_COMPLEX) 4415 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4416 #endif 4417 } 4418 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4419 PetscFunctionReturn(0); 4420 } 4421 4422 #undef __FUNCT__ 4423 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4424 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4425 { 4426 PC_IS* pcis = (PC_IS*)(pc->data); 4427 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4428 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4429 PetscInt *idx_R_local=NULL; 4430 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4431 PetscInt vbs,bs; 4432 PetscBT bitmask=NULL; 4433 PetscErrorCode ierr; 4434 4435 PetscFunctionBegin; 4436 /* 4437 No need to setup local scatters if 4438 - primal space is unchanged 4439 AND 4440 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4441 AND 4442 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4443 */ 4444 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4445 PetscFunctionReturn(0); 4446 } 4447 /* destroy old objects */ 4448 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4449 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4450 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4451 /* Set Non-overlapping dimensions */ 4452 n_B = pcis->n_B; 4453 n_D = pcis->n - n_B; 4454 n_vertices = pcbddc->n_vertices; 4455 4456 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4457 4458 /* create auxiliary bitmask and allocate workspace */ 4459 if (!sub_schurs || !sub_schurs->reuse_solver) { 4460 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4461 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4462 for (i=0;i<n_vertices;i++) { 4463 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4464 } 4465 4466 for (i=0, n_R=0; i<pcis->n; i++) { 4467 if (!PetscBTLookup(bitmask,i)) { 4468 idx_R_local[n_R++] = i; 4469 } 4470 } 4471 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4472 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4473 4474 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4475 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4476 } 4477 4478 /* Block code */ 4479 vbs = 1; 4480 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4481 if (bs>1 && !(n_vertices%bs)) { 4482 PetscBool is_blocked = PETSC_TRUE; 4483 PetscInt *vary; 4484 if (!sub_schurs || !sub_schurs->reuse_solver) { 4485 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4486 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4487 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4488 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4489 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4490 for (i=0; i<pcis->n/bs; i++) { 4491 if (vary[i]!=0 && vary[i]!=bs) { 4492 is_blocked = PETSC_FALSE; 4493 break; 4494 } 4495 } 4496 ierr = PetscFree(vary);CHKERRQ(ierr); 4497 } else { 4498 /* Verify directly the R set */ 4499 for (i=0; i<n_R/bs; i++) { 4500 PetscInt j,node=idx_R_local[bs*i]; 4501 for (j=1; j<bs; j++) { 4502 if (node != idx_R_local[bs*i+j]-j) { 4503 is_blocked = PETSC_FALSE; 4504 break; 4505 } 4506 } 4507 } 4508 } 4509 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4510 vbs = bs; 4511 for (i=0;i<n_R/vbs;i++) { 4512 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4513 } 4514 } 4515 } 4516 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4517 if (sub_schurs && sub_schurs->reuse_solver) { 4518 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4519 4520 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4521 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4522 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4523 reuse_solver->is_R = pcbddc->is_R_local; 4524 } else { 4525 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4526 } 4527 4528 /* print some info if requested */ 4529 if (pcbddc->dbg_flag) { 4530 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4531 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4532 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4533 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4534 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4535 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4536 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4537 } 4538 4539 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4540 if (!sub_schurs || !sub_schurs->reuse_solver) { 4541 IS is_aux1,is_aux2; 4542 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4543 4544 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4545 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4546 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4547 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4548 for (i=0; i<n_D; i++) { 4549 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4550 } 4551 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4552 for (i=0, j=0; i<n_R; i++) { 4553 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4554 aux_array1[j++] = i; 4555 } 4556 } 4557 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4558 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4559 for (i=0, j=0; i<n_B; i++) { 4560 if (!PetscBTLookup(bitmask,is_indices[i])) { 4561 aux_array2[j++] = i; 4562 } 4563 } 4564 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4565 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4566 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4567 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4568 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4569 4570 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4571 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4572 for (i=0, j=0; i<n_R; i++) { 4573 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4574 aux_array1[j++] = i; 4575 } 4576 } 4577 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4578 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4579 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4580 } 4581 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4582 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4583 } else { 4584 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4585 IS tis; 4586 PetscInt schur_size; 4587 4588 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4589 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4590 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4591 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4592 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4593 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4594 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4595 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4596 } 4597 } 4598 PetscFunctionReturn(0); 4599 } 4600 4601 4602 #undef __FUNCT__ 4603 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4604 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4605 { 4606 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4607 PC_IS *pcis = (PC_IS*)pc->data; 4608 PC pc_temp; 4609 Mat A_RR; 4610 MatReuse reuse; 4611 PetscScalar m_one = -1.0; 4612 PetscReal value; 4613 PetscInt n_D,n_R; 4614 PetscBool check_corr[2],issbaij; 4615 PetscErrorCode ierr; 4616 /* prefixes stuff */ 4617 char dir_prefix[256],neu_prefix[256],str_level[16]; 4618 size_t len; 4619 4620 PetscFunctionBegin; 4621 4622 /* compute prefixes */ 4623 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4624 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4625 if (!pcbddc->current_level) { 4626 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4627 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4628 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4629 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4630 } else { 4631 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4632 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4633 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4634 len -= 15; /* remove "pc_bddc_coarse_" */ 4635 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4636 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4637 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4638 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4639 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4640 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4641 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4642 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4643 } 4644 4645 /* DIRICHLET PROBLEM */ 4646 if (dirichlet) { 4647 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4648 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4649 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4650 if (pcbddc->dbg_flag) { 4651 Mat A_IIn; 4652 4653 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4654 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4655 pcis->A_II = A_IIn; 4656 } 4657 } 4658 if (pcbddc->local_mat->symmetric_set) { 4659 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4660 } 4661 /* Matrix for Dirichlet problem is pcis->A_II */ 4662 n_D = pcis->n - pcis->n_B; 4663 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4664 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4665 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4666 /* default */ 4667 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4668 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4669 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4670 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4671 if (issbaij) { 4672 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4673 } else { 4674 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4675 } 4676 /* Allow user's customization */ 4677 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4678 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4679 } 4680 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4681 if (sub_schurs && sub_schurs->reuse_solver) { 4682 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4683 4684 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4685 } 4686 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4687 if (!n_D) { 4688 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4689 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4690 } 4691 /* Set Up KSP for Dirichlet problem of BDDC */ 4692 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4693 /* set ksp_D into pcis data */ 4694 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4695 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4696 pcis->ksp_D = pcbddc->ksp_D; 4697 } 4698 4699 /* NEUMANN PROBLEM */ 4700 A_RR = 0; 4701 if (neumann) { 4702 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4703 PetscInt ibs,mbs; 4704 PetscBool issbaij; 4705 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4706 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4707 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4708 if (pcbddc->ksp_R) { /* already created ksp */ 4709 PetscInt nn_R; 4710 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4711 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4712 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4713 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4714 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4715 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4716 reuse = MAT_INITIAL_MATRIX; 4717 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4718 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4719 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4720 reuse = MAT_INITIAL_MATRIX; 4721 } else { /* safe to reuse the matrix */ 4722 reuse = MAT_REUSE_MATRIX; 4723 } 4724 } 4725 /* last check */ 4726 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4727 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4728 reuse = MAT_INITIAL_MATRIX; 4729 } 4730 } else { /* first time, so we need to create the matrix */ 4731 reuse = MAT_INITIAL_MATRIX; 4732 } 4733 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4734 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4735 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4736 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4737 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4738 if (matis->A == pcbddc->local_mat) { 4739 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4740 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4741 } else { 4742 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4743 } 4744 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4745 if (matis->A == pcbddc->local_mat) { 4746 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4747 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4748 } else { 4749 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4750 } 4751 } 4752 /* extract A_RR */ 4753 if (sub_schurs && sub_schurs->reuse_solver) { 4754 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4755 4756 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4757 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4758 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4759 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4760 } else { 4761 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4762 } 4763 } else { 4764 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4765 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4766 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4767 } 4768 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4769 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4770 } 4771 if (pcbddc->local_mat->symmetric_set) { 4772 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4773 } 4774 if (!pcbddc->ksp_R) { /* create object if not present */ 4775 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4776 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4777 /* default */ 4778 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4779 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4780 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4781 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4782 if (issbaij) { 4783 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4784 } else { 4785 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4786 } 4787 /* Allow user's customization */ 4788 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4789 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4790 } 4791 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4792 if (!n_R) { 4793 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4794 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4795 } 4796 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4797 /* Reuse solver if it is present */ 4798 if (sub_schurs && sub_schurs->reuse_solver) { 4799 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4800 4801 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4802 } 4803 /* Set Up KSP for Neumann problem of BDDC */ 4804 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4805 } 4806 4807 if (pcbddc->dbg_flag) { 4808 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4809 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4810 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4811 } 4812 4813 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4814 check_corr[0] = check_corr[1] = PETSC_FALSE; 4815 if (pcbddc->NullSpace_corr[0]) { 4816 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4817 } 4818 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4819 check_corr[0] = PETSC_TRUE; 4820 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4821 } 4822 if (neumann && pcbddc->NullSpace_corr[2]) { 4823 check_corr[1] = PETSC_TRUE; 4824 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4825 } 4826 4827 /* check Dirichlet and Neumann solvers */ 4828 if (pcbddc->dbg_flag) { 4829 if (dirichlet) { /* Dirichlet */ 4830 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4831 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4832 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4833 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4834 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4835 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4836 if (check_corr[0]) { 4837 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4838 } 4839 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4840 } 4841 if (neumann) { /* Neumann */ 4842 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4843 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4844 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4845 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4846 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4847 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4848 if (check_corr[1]) { 4849 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4850 } 4851 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4852 } 4853 } 4854 /* free Neumann problem's matrix */ 4855 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4856 PetscFunctionReturn(0); 4857 } 4858 4859 #undef __FUNCT__ 4860 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4861 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4862 { 4863 PetscErrorCode ierr; 4864 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4865 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4866 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4867 4868 PetscFunctionBegin; 4869 if (!reuse_solver) { 4870 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4871 } 4872 if (!pcbddc->switch_static) { 4873 if (applytranspose && pcbddc->local_auxmat1) { 4874 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4875 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4876 } 4877 if (!reuse_solver) { 4878 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4879 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4880 } else { 4881 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4882 4883 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4884 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4885 } 4886 } else { 4887 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4888 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4889 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4890 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4891 if (applytranspose && pcbddc->local_auxmat1) { 4892 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4893 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4894 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4895 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4896 } 4897 } 4898 if (!reuse_solver || pcbddc->switch_static) { 4899 if (applytranspose) { 4900 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4901 } else { 4902 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4903 } 4904 } else { 4905 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4906 4907 if (applytranspose) { 4908 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4909 } else { 4910 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4911 } 4912 } 4913 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4914 if (!pcbddc->switch_static) { 4915 if (!reuse_solver) { 4916 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4917 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4918 } else { 4919 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4920 4921 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4922 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4923 } 4924 if (!applytranspose && pcbddc->local_auxmat1) { 4925 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4926 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4927 } 4928 } else { 4929 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4930 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4931 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4932 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4933 if (!applytranspose && pcbddc->local_auxmat1) { 4934 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4935 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4936 } 4937 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4938 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4939 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4940 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4941 } 4942 PetscFunctionReturn(0); 4943 } 4944 4945 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4946 #undef __FUNCT__ 4947 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4948 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4949 { 4950 PetscErrorCode ierr; 4951 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4952 PC_IS* pcis = (PC_IS*) (pc->data); 4953 const PetscScalar zero = 0.0; 4954 4955 PetscFunctionBegin; 4956 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4957 if (!pcbddc->benign_apply_coarse_only) { 4958 if (applytranspose) { 4959 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4960 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4961 } else { 4962 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4963 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4964 } 4965 } else { 4966 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4967 } 4968 4969 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4970 if (pcbddc->benign_n) { 4971 PetscScalar *array; 4972 PetscInt j; 4973 4974 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4975 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4976 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4977 } 4978 4979 /* start communications from local primal nodes to rhs of coarse solver */ 4980 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4981 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4982 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4983 4984 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4985 if (pcbddc->coarse_ksp) { 4986 Mat coarse_mat; 4987 Vec rhs,sol; 4988 MatNullSpace nullsp; 4989 PetscBool isbddc = PETSC_FALSE; 4990 4991 if (pcbddc->benign_have_null) { 4992 PC coarse_pc; 4993 4994 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4995 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4996 /* we need to propagate to coarser levels the need for a possible benign correction */ 4997 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4998 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4999 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5000 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5001 } 5002 } 5003 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5004 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5005 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5006 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5007 if (nullsp) { 5008 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5009 } 5010 if (applytranspose) { 5011 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5012 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5013 } else { 5014 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5015 PC coarse_pc; 5016 5017 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5018 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5019 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5020 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5021 } else { 5022 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5023 } 5024 } 5025 /* we don't need the benign correction at coarser levels anymore */ 5026 if (pcbddc->benign_have_null && isbddc) { 5027 PC coarse_pc; 5028 PC_BDDC* coarsepcbddc; 5029 5030 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5031 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5032 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5033 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5034 } 5035 if (nullsp) { 5036 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5037 } 5038 } 5039 5040 /* Local solution on R nodes */ 5041 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5042 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5043 } 5044 /* communications from coarse sol to local primal nodes */ 5045 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5046 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5047 5048 /* Sum contributions from the two levels */ 5049 if (!pcbddc->benign_apply_coarse_only) { 5050 if (applytranspose) { 5051 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5052 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5053 } else { 5054 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5055 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5056 } 5057 /* store p0 */ 5058 if (pcbddc->benign_n) { 5059 PetscScalar *array; 5060 PetscInt j; 5061 5062 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5063 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5064 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5065 } 5066 } else { /* expand the coarse solution */ 5067 if (applytranspose) { 5068 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5069 } else { 5070 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5071 } 5072 } 5073 PetscFunctionReturn(0); 5074 } 5075 5076 #undef __FUNCT__ 5077 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 5078 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5079 { 5080 PetscErrorCode ierr; 5081 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5082 PetscScalar *array; 5083 Vec from,to; 5084 5085 PetscFunctionBegin; 5086 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5087 from = pcbddc->coarse_vec; 5088 to = pcbddc->vec1_P; 5089 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5090 Vec tvec; 5091 5092 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5093 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5094 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5095 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5096 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5097 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5098 } 5099 } else { /* from local to global -> put data in coarse right hand side */ 5100 from = pcbddc->vec1_P; 5101 to = pcbddc->coarse_vec; 5102 } 5103 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5104 PetscFunctionReturn(0); 5105 } 5106 5107 #undef __FUNCT__ 5108 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 5109 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5110 { 5111 PetscErrorCode ierr; 5112 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5113 PetscScalar *array; 5114 Vec from,to; 5115 5116 PetscFunctionBegin; 5117 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5118 from = pcbddc->coarse_vec; 5119 to = pcbddc->vec1_P; 5120 } else { /* from local to global -> put data in coarse right hand side */ 5121 from = pcbddc->vec1_P; 5122 to = pcbddc->coarse_vec; 5123 } 5124 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5125 if (smode == SCATTER_FORWARD) { 5126 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5127 Vec tvec; 5128 5129 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5130 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5131 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5132 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5133 } 5134 } else { 5135 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5136 ierr = VecResetArray(from);CHKERRQ(ierr); 5137 } 5138 } 5139 PetscFunctionReturn(0); 5140 } 5141 5142 /* uncomment for testing purposes */ 5143 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5144 #undef __FUNCT__ 5145 #define __FUNCT__ "PCBDDCConstraintsSetUp" 5146 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5147 { 5148 PetscErrorCode ierr; 5149 PC_IS* pcis = (PC_IS*)(pc->data); 5150 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5151 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5152 /* one and zero */ 5153 PetscScalar one=1.0,zero=0.0; 5154 /* space to store constraints and their local indices */ 5155 PetscScalar *constraints_data; 5156 PetscInt *constraints_idxs,*constraints_idxs_B; 5157 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5158 PetscInt *constraints_n; 5159 /* iterators */ 5160 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5161 /* BLAS integers */ 5162 PetscBLASInt lwork,lierr; 5163 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5164 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5165 /* reuse */ 5166 PetscInt olocal_primal_size,olocal_primal_size_cc; 5167 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5168 /* change of basis */ 5169 PetscBool qr_needed; 5170 PetscBT change_basis,qr_needed_idx; 5171 /* auxiliary stuff */ 5172 PetscInt *nnz,*is_indices; 5173 PetscInt ncc; 5174 /* some quantities */ 5175 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5176 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5177 5178 PetscFunctionBegin; 5179 /* Destroy Mat objects computed previously */ 5180 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5181 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5182 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5183 /* save info on constraints from previous setup (if any) */ 5184 olocal_primal_size = pcbddc->local_primal_size; 5185 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5186 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5187 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5188 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5189 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5190 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5191 5192 if (!pcbddc->adaptive_selection) { 5193 IS ISForVertices,*ISForFaces,*ISForEdges; 5194 MatNullSpace nearnullsp; 5195 const Vec *nearnullvecs; 5196 Vec *localnearnullsp; 5197 PetscScalar *array; 5198 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5199 PetscBool nnsp_has_cnst; 5200 /* LAPACK working arrays for SVD or POD */ 5201 PetscBool skip_lapack,boolforchange; 5202 PetscScalar *work; 5203 PetscReal *singular_vals; 5204 #if defined(PETSC_USE_COMPLEX) 5205 PetscReal *rwork; 5206 #endif 5207 #if defined(PETSC_MISSING_LAPACK_GESVD) 5208 PetscScalar *temp_basis,*correlation_mat; 5209 #else 5210 PetscBLASInt dummy_int=1; 5211 PetscScalar dummy_scalar=1.; 5212 #endif 5213 5214 /* Get index sets for faces, edges and vertices from graph */ 5215 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5216 /* print some info */ 5217 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5218 PetscInt nv; 5219 5220 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5221 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5222 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5223 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5224 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5225 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5226 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5227 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5228 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5229 } 5230 5231 /* free unneeded index sets */ 5232 if (!pcbddc->use_vertices) { 5233 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5234 } 5235 if (!pcbddc->use_edges) { 5236 for (i=0;i<n_ISForEdges;i++) { 5237 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5238 } 5239 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5240 n_ISForEdges = 0; 5241 } 5242 if (!pcbddc->use_faces) { 5243 for (i=0;i<n_ISForFaces;i++) { 5244 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5245 } 5246 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5247 n_ISForFaces = 0; 5248 } 5249 5250 /* check if near null space is attached to global mat */ 5251 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5252 if (nearnullsp) { 5253 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5254 /* remove any stored info */ 5255 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5256 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5257 /* store information for BDDC solver reuse */ 5258 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5259 pcbddc->onearnullspace = nearnullsp; 5260 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5261 for (i=0;i<nnsp_size;i++) { 5262 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5263 } 5264 } else { /* if near null space is not provided BDDC uses constants by default */ 5265 nnsp_size = 0; 5266 nnsp_has_cnst = PETSC_TRUE; 5267 } 5268 /* get max number of constraints on a single cc */ 5269 max_constraints = nnsp_size; 5270 if (nnsp_has_cnst) max_constraints++; 5271 5272 /* 5273 Evaluate maximum storage size needed by the procedure 5274 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5275 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5276 There can be multiple constraints per connected component 5277 */ 5278 n_vertices = 0; 5279 if (ISForVertices) { 5280 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5281 } 5282 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5283 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5284 5285 total_counts = n_ISForFaces+n_ISForEdges; 5286 total_counts *= max_constraints; 5287 total_counts += n_vertices; 5288 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5289 5290 total_counts = 0; 5291 max_size_of_constraint = 0; 5292 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5293 IS used_is; 5294 if (i<n_ISForEdges) { 5295 used_is = ISForEdges[i]; 5296 } else { 5297 used_is = ISForFaces[i-n_ISForEdges]; 5298 } 5299 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5300 total_counts += j; 5301 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5302 } 5303 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5304 5305 /* get local part of global near null space vectors */ 5306 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5307 for (k=0;k<nnsp_size;k++) { 5308 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5309 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5310 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5311 } 5312 5313 /* whether or not to skip lapack calls */ 5314 skip_lapack = PETSC_TRUE; 5315 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5316 5317 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5318 if (!skip_lapack) { 5319 PetscScalar temp_work; 5320 5321 #if defined(PETSC_MISSING_LAPACK_GESVD) 5322 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5323 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5324 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5325 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5326 #if defined(PETSC_USE_COMPLEX) 5327 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5328 #endif 5329 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5330 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5331 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5332 lwork = -1; 5333 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5334 #if !defined(PETSC_USE_COMPLEX) 5335 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5336 #else 5337 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5338 #endif 5339 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5340 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5341 #else /* on missing GESVD */ 5342 /* SVD */ 5343 PetscInt max_n,min_n; 5344 max_n = max_size_of_constraint; 5345 min_n = max_constraints; 5346 if (max_size_of_constraint < max_constraints) { 5347 min_n = max_size_of_constraint; 5348 max_n = max_constraints; 5349 } 5350 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5351 #if defined(PETSC_USE_COMPLEX) 5352 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5353 #endif 5354 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5355 lwork = -1; 5356 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5357 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5358 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5359 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5360 #if !defined(PETSC_USE_COMPLEX) 5361 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5362 #else 5363 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5364 #endif 5365 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5366 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5367 #endif /* on missing GESVD */ 5368 /* Allocate optimal workspace */ 5369 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5370 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5371 } 5372 /* Now we can loop on constraining sets */ 5373 total_counts = 0; 5374 constraints_idxs_ptr[0] = 0; 5375 constraints_data_ptr[0] = 0; 5376 /* vertices */ 5377 if (n_vertices) { 5378 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5379 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5380 for (i=0;i<n_vertices;i++) { 5381 constraints_n[total_counts] = 1; 5382 constraints_data[total_counts] = 1.0; 5383 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5384 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5385 total_counts++; 5386 } 5387 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5388 n_vertices = total_counts; 5389 } 5390 5391 /* edges and faces */ 5392 total_counts_cc = total_counts; 5393 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5394 IS used_is; 5395 PetscBool idxs_copied = PETSC_FALSE; 5396 5397 if (ncc<n_ISForEdges) { 5398 used_is = ISForEdges[ncc]; 5399 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5400 } else { 5401 used_is = ISForFaces[ncc-n_ISForEdges]; 5402 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5403 } 5404 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5405 5406 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5407 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5408 /* change of basis should not be performed on local periodic nodes */ 5409 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5410 if (nnsp_has_cnst) { 5411 PetscScalar quad_value; 5412 5413 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5414 idxs_copied = PETSC_TRUE; 5415 5416 if (!pcbddc->use_nnsp_true) { 5417 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5418 } else { 5419 quad_value = 1.0; 5420 } 5421 for (j=0;j<size_of_constraint;j++) { 5422 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5423 } 5424 temp_constraints++; 5425 total_counts++; 5426 } 5427 for (k=0;k<nnsp_size;k++) { 5428 PetscReal real_value; 5429 PetscScalar *ptr_to_data; 5430 5431 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5432 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5433 for (j=0;j<size_of_constraint;j++) { 5434 ptr_to_data[j] = array[is_indices[j]]; 5435 } 5436 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5437 /* check if array is null on the connected component */ 5438 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5439 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5440 if (real_value > 0.0) { /* keep indices and values */ 5441 temp_constraints++; 5442 total_counts++; 5443 if (!idxs_copied) { 5444 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5445 idxs_copied = PETSC_TRUE; 5446 } 5447 } 5448 } 5449 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5450 valid_constraints = temp_constraints; 5451 if (!pcbddc->use_nnsp_true && temp_constraints) { 5452 if (temp_constraints == 1) { /* just normalize the constraint */ 5453 PetscScalar norm,*ptr_to_data; 5454 5455 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5456 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5457 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5458 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5459 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5460 } else { /* perform SVD */ 5461 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5462 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5463 5464 #if defined(PETSC_MISSING_LAPACK_GESVD) 5465 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5466 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5467 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5468 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5469 from that computed using LAPACKgesvd 5470 -> This is due to a different computation of eigenvectors in LAPACKheev 5471 -> The quality of the POD-computed basis will be the same */ 5472 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5473 /* Store upper triangular part of correlation matrix */ 5474 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5475 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5476 for (j=0;j<temp_constraints;j++) { 5477 for (k=0;k<j+1;k++) { 5478 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5479 } 5480 } 5481 /* compute eigenvalues and eigenvectors of correlation matrix */ 5482 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5483 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5484 #if !defined(PETSC_USE_COMPLEX) 5485 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5486 #else 5487 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5488 #endif 5489 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5490 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5491 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5492 j = 0; 5493 while (j < temp_constraints && singular_vals[j] < tol) j++; 5494 total_counts = total_counts-j; 5495 valid_constraints = temp_constraints-j; 5496 /* scale and copy POD basis into used quadrature memory */ 5497 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5498 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5499 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5501 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5502 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5503 if (j<temp_constraints) { 5504 PetscInt ii; 5505 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5506 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5507 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5508 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5509 for (k=0;k<temp_constraints-j;k++) { 5510 for (ii=0;ii<size_of_constraint;ii++) { 5511 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5512 } 5513 } 5514 } 5515 #else /* on missing GESVD */ 5516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5517 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5518 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5519 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5520 #if !defined(PETSC_USE_COMPLEX) 5521 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5522 #else 5523 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5524 #endif 5525 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5526 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5527 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5528 k = temp_constraints; 5529 if (k > size_of_constraint) k = size_of_constraint; 5530 j = 0; 5531 while (j < k && singular_vals[k-j-1] < tol) j++; 5532 valid_constraints = k-j; 5533 total_counts = total_counts-temp_constraints+valid_constraints; 5534 #endif /* on missing GESVD */ 5535 } 5536 } 5537 /* update pointers information */ 5538 if (valid_constraints) { 5539 constraints_n[total_counts_cc] = valid_constraints; 5540 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5541 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5542 /* set change_of_basis flag */ 5543 if (boolforchange) { 5544 PetscBTSet(change_basis,total_counts_cc); 5545 } 5546 total_counts_cc++; 5547 } 5548 } 5549 /* free workspace */ 5550 if (!skip_lapack) { 5551 ierr = PetscFree(work);CHKERRQ(ierr); 5552 #if defined(PETSC_USE_COMPLEX) 5553 ierr = PetscFree(rwork);CHKERRQ(ierr); 5554 #endif 5555 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5556 #if defined(PETSC_MISSING_LAPACK_GESVD) 5557 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5558 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5559 #endif 5560 } 5561 for (k=0;k<nnsp_size;k++) { 5562 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5563 } 5564 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5565 /* free index sets of faces, edges and vertices */ 5566 for (i=0;i<n_ISForFaces;i++) { 5567 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5568 } 5569 if (n_ISForFaces) { 5570 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5571 } 5572 for (i=0;i<n_ISForEdges;i++) { 5573 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5574 } 5575 if (n_ISForEdges) { 5576 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5577 } 5578 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5579 } else { 5580 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5581 5582 total_counts = 0; 5583 n_vertices = 0; 5584 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5585 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5586 } 5587 max_constraints = 0; 5588 total_counts_cc = 0; 5589 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5590 total_counts += pcbddc->adaptive_constraints_n[i]; 5591 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5592 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5593 } 5594 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5595 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5596 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5597 constraints_data = pcbddc->adaptive_constraints_data; 5598 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5599 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5600 total_counts_cc = 0; 5601 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5602 if (pcbddc->adaptive_constraints_n[i]) { 5603 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5604 } 5605 } 5606 #if 0 5607 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5608 for (i=0;i<total_counts_cc;i++) { 5609 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5610 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5611 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5612 printf(" %d",constraints_idxs[j]); 5613 } 5614 printf("\n"); 5615 printf("number of cc: %d\n",constraints_n[i]); 5616 } 5617 for (i=0;i<n_vertices;i++) { 5618 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5619 } 5620 for (i=0;i<sub_schurs->n_subs;i++) { 5621 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5622 } 5623 #endif 5624 5625 max_size_of_constraint = 0; 5626 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5627 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5628 /* Change of basis */ 5629 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5630 if (pcbddc->use_change_of_basis) { 5631 for (i=0;i<sub_schurs->n_subs;i++) { 5632 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5633 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5634 } 5635 } 5636 } 5637 } 5638 pcbddc->local_primal_size = total_counts; 5639 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5640 5641 /* map constraints_idxs in boundary numbering */ 5642 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5643 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5644 5645 /* Create constraint matrix */ 5646 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5647 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5648 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5649 5650 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5651 /* determine if a QR strategy is needed for change of basis */ 5652 qr_needed = PETSC_FALSE; 5653 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5654 total_primal_vertices=0; 5655 pcbddc->local_primal_size_cc = 0; 5656 for (i=0;i<total_counts_cc;i++) { 5657 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5658 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5659 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5660 pcbddc->local_primal_size_cc += 1; 5661 } else if (PetscBTLookup(change_basis,i)) { 5662 for (k=0;k<constraints_n[i];k++) { 5663 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5664 } 5665 pcbddc->local_primal_size_cc += constraints_n[i]; 5666 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5667 PetscBTSet(qr_needed_idx,i); 5668 qr_needed = PETSC_TRUE; 5669 } 5670 } else { 5671 pcbddc->local_primal_size_cc += 1; 5672 } 5673 } 5674 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5675 pcbddc->n_vertices = total_primal_vertices; 5676 /* permute indices in order to have a sorted set of vertices */ 5677 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5678 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5679 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5680 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5681 5682 /* nonzero structure of constraint matrix */ 5683 /* and get reference dof for local constraints */ 5684 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5685 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5686 5687 j = total_primal_vertices; 5688 total_counts = total_primal_vertices; 5689 cum = total_primal_vertices; 5690 for (i=n_vertices;i<total_counts_cc;i++) { 5691 if (!PetscBTLookup(change_basis,i)) { 5692 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5693 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5694 cum++; 5695 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5696 for (k=0;k<constraints_n[i];k++) { 5697 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5698 nnz[j+k] = size_of_constraint; 5699 } 5700 j += constraints_n[i]; 5701 } 5702 } 5703 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5704 ierr = PetscFree(nnz);CHKERRQ(ierr); 5705 5706 /* set values in constraint matrix */ 5707 for (i=0;i<total_primal_vertices;i++) { 5708 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5709 } 5710 total_counts = total_primal_vertices; 5711 for (i=n_vertices;i<total_counts_cc;i++) { 5712 if (!PetscBTLookup(change_basis,i)) { 5713 PetscInt *cols; 5714 5715 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5716 cols = constraints_idxs+constraints_idxs_ptr[i]; 5717 for (k=0;k<constraints_n[i];k++) { 5718 PetscInt row = total_counts+k; 5719 PetscScalar *vals; 5720 5721 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5722 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5723 } 5724 total_counts += constraints_n[i]; 5725 } 5726 } 5727 /* assembling */ 5728 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5729 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5730 5731 /* 5732 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5733 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5734 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5735 */ 5736 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5737 if (pcbddc->use_change_of_basis) { 5738 /* dual and primal dofs on a single cc */ 5739 PetscInt dual_dofs,primal_dofs; 5740 /* working stuff for GEQRF */ 5741 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5742 PetscBLASInt lqr_work; 5743 /* working stuff for UNGQR */ 5744 PetscScalar *gqr_work,lgqr_work_t; 5745 PetscBLASInt lgqr_work; 5746 /* working stuff for TRTRS */ 5747 PetscScalar *trs_rhs; 5748 PetscBLASInt Blas_NRHS; 5749 /* pointers for values insertion into change of basis matrix */ 5750 PetscInt *start_rows,*start_cols; 5751 PetscScalar *start_vals; 5752 /* working stuff for values insertion */ 5753 PetscBT is_primal; 5754 PetscInt *aux_primal_numbering_B; 5755 /* matrix sizes */ 5756 PetscInt global_size,local_size; 5757 /* temporary change of basis */ 5758 Mat localChangeOfBasisMatrix; 5759 /* extra space for debugging */ 5760 PetscScalar *dbg_work; 5761 5762 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5763 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5764 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5765 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5766 /* nonzeros for local mat */ 5767 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5768 if (!pcbddc->benign_change || pcbddc->fake_change) { 5769 for (i=0;i<pcis->n;i++) nnz[i]=1; 5770 } else { 5771 const PetscInt *ii; 5772 PetscInt n; 5773 PetscBool flg_row; 5774 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5775 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5776 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5777 } 5778 for (i=n_vertices;i<total_counts_cc;i++) { 5779 if (PetscBTLookup(change_basis,i)) { 5780 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5781 if (PetscBTLookup(qr_needed_idx,i)) { 5782 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5783 } else { 5784 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5785 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5786 } 5787 } 5788 } 5789 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5790 ierr = PetscFree(nnz);CHKERRQ(ierr); 5791 /* Set interior change in the matrix */ 5792 if (!pcbddc->benign_change || pcbddc->fake_change) { 5793 for (i=0;i<pcis->n;i++) { 5794 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5795 } 5796 } else { 5797 const PetscInt *ii,*jj; 5798 PetscScalar *aa; 5799 PetscInt n; 5800 PetscBool flg_row; 5801 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5802 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5803 for (i=0;i<n;i++) { 5804 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5805 } 5806 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5807 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5808 } 5809 5810 if (pcbddc->dbg_flag) { 5811 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5813 } 5814 5815 5816 /* Now we loop on the constraints which need a change of basis */ 5817 /* 5818 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5819 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5820 5821 Basic blocks of change of basis matrix T computed by 5822 5823 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5824 5825 | 1 0 ... 0 s_1/S | 5826 | 0 1 ... 0 s_2/S | 5827 | ... | 5828 | 0 ... 1 s_{n-1}/S | 5829 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5830 5831 with S = \sum_{i=1}^n s_i^2 5832 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5833 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5834 5835 - QR decomposition of constraints otherwise 5836 */ 5837 if (qr_needed) { 5838 /* space to store Q */ 5839 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5840 /* array to store scaling factors for reflectors */ 5841 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5842 /* first we issue queries for optimal work */ 5843 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5844 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5845 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5846 lqr_work = -1; 5847 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5848 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5849 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5850 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5851 lgqr_work = -1; 5852 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5853 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5854 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5855 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5856 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5857 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5858 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5859 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5860 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5861 /* array to store rhs and solution of triangular solver */ 5862 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5863 /* allocating workspace for check */ 5864 if (pcbddc->dbg_flag) { 5865 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5866 } 5867 } 5868 /* array to store whether a node is primal or not */ 5869 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5870 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5871 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5872 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5873 for (i=0;i<total_primal_vertices;i++) { 5874 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5875 } 5876 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5877 5878 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5879 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5880 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5881 if (PetscBTLookup(change_basis,total_counts)) { 5882 /* get constraint info */ 5883 primal_dofs = constraints_n[total_counts]; 5884 dual_dofs = size_of_constraint-primal_dofs; 5885 5886 if (pcbddc->dbg_flag) { 5887 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5888 } 5889 5890 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5891 5892 /* copy quadrature constraints for change of basis check */ 5893 if (pcbddc->dbg_flag) { 5894 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5895 } 5896 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5897 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5898 5899 /* compute QR decomposition of constraints */ 5900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5901 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5902 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5904 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5905 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5906 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5907 5908 /* explictly compute R^-T */ 5909 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5910 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5911 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5912 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5913 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5914 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5915 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5916 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5917 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5918 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5919 5920 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5921 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5922 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5923 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5924 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5925 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5926 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5927 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5928 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5929 5930 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5931 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5932 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5933 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5934 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5935 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5936 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5937 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5938 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5939 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5940 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5941 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5942 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5943 5944 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5945 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5946 /* insert cols for primal dofs */ 5947 for (j=0;j<primal_dofs;j++) { 5948 start_vals = &qr_basis[j*size_of_constraint]; 5949 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5950 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5951 } 5952 /* insert cols for dual dofs */ 5953 for (j=0,k=0;j<dual_dofs;k++) { 5954 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5955 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5956 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5957 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5958 j++; 5959 } 5960 } 5961 5962 /* check change of basis */ 5963 if (pcbddc->dbg_flag) { 5964 PetscInt ii,jj; 5965 PetscBool valid_qr=PETSC_TRUE; 5966 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5968 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5969 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5970 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5971 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5972 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5973 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5974 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5975 for (jj=0;jj<size_of_constraint;jj++) { 5976 for (ii=0;ii<primal_dofs;ii++) { 5977 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5978 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5979 } 5980 } 5981 if (!valid_qr) { 5982 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5983 for (jj=0;jj<size_of_constraint;jj++) { 5984 for (ii=0;ii<primal_dofs;ii++) { 5985 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5986 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5987 } 5988 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5989 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5990 } 5991 } 5992 } 5993 } else { 5994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5995 } 5996 } 5997 } else { /* simple transformation block */ 5998 PetscInt row,col; 5999 PetscScalar val,norm; 6000 6001 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6002 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6003 for (j=0;j<size_of_constraint;j++) { 6004 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6005 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6006 if (!PetscBTLookup(is_primal,row_B)) { 6007 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6008 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6009 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6010 } else { 6011 for (k=0;k<size_of_constraint;k++) { 6012 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6013 if (row != col) { 6014 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6015 } else { 6016 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6017 } 6018 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6019 } 6020 } 6021 } 6022 if (pcbddc->dbg_flag) { 6023 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6024 } 6025 } 6026 } else { 6027 if (pcbddc->dbg_flag) { 6028 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6029 } 6030 } 6031 } 6032 6033 /* free workspace */ 6034 if (qr_needed) { 6035 if (pcbddc->dbg_flag) { 6036 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6037 } 6038 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6039 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6040 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6041 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6042 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6043 } 6044 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6045 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6046 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6047 6048 /* assembling of global change of variable */ 6049 if (!pcbddc->fake_change) { 6050 Mat tmat; 6051 PetscInt bs; 6052 6053 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6054 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6055 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6056 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6057 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6058 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6059 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6060 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6061 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6062 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6063 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6064 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6065 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6066 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6067 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6068 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6069 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6070 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6071 6072 /* check */ 6073 if (pcbddc->dbg_flag) { 6074 PetscReal error; 6075 Vec x,x_change; 6076 6077 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6078 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6079 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6080 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6081 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6082 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6083 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6084 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6085 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6086 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6087 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6088 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6089 if (error > PETSC_SMALL) { 6090 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6091 } 6092 ierr = VecDestroy(&x);CHKERRQ(ierr); 6093 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6094 } 6095 /* adapt sub_schurs computed (if any) */ 6096 if (pcbddc->use_deluxe_scaling) { 6097 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6098 6099 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6100 if (sub_schurs && sub_schurs->S_Ej_all) { 6101 Mat S_new,tmat; 6102 IS is_all_N,is_V_Sall = NULL; 6103 6104 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6105 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6106 if (pcbddc->deluxe_zerorows) { 6107 ISLocalToGlobalMapping NtoSall; 6108 IS is_V; 6109 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6110 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6111 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6112 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6113 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6114 } 6115 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6116 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6117 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6118 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6119 if (pcbddc->deluxe_zerorows) { 6120 const PetscScalar *array; 6121 const PetscInt *idxs_V,*idxs_all; 6122 PetscInt i,n_V; 6123 6124 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6125 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6126 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6127 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6128 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6129 for (i=0;i<n_V;i++) { 6130 PetscScalar val; 6131 PetscInt idx; 6132 6133 idx = idxs_V[i]; 6134 val = array[idxs_all[idxs_V[i]]]; 6135 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6136 } 6137 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6138 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6139 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6140 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6141 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6142 } 6143 sub_schurs->S_Ej_all = S_new; 6144 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6145 if (sub_schurs->sum_S_Ej_all) { 6146 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6147 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6148 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6149 if (pcbddc->deluxe_zerorows) { 6150 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6151 } 6152 sub_schurs->sum_S_Ej_all = S_new; 6153 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6154 } 6155 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6156 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6157 } 6158 /* destroy any change of basis context in sub_schurs */ 6159 if (sub_schurs && sub_schurs->change) { 6160 PetscInt i; 6161 6162 for (i=0;i<sub_schurs->n_subs;i++) { 6163 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6164 } 6165 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6166 } 6167 } 6168 if (pcbddc->switch_static) { /* need to save the local change */ 6169 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6170 } else { 6171 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6172 } 6173 /* determine if any process has changed the pressures locally */ 6174 pcbddc->change_interior = pcbddc->benign_have_null; 6175 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6176 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6177 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6178 pcbddc->use_qr_single = qr_needed; 6179 } 6180 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6181 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6182 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6183 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6184 } else { 6185 Mat benign_global = NULL; 6186 if (pcbddc->benign_have_null) { 6187 Mat tmat; 6188 6189 pcbddc->change_interior = PETSC_TRUE; 6190 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6191 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6192 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6193 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6194 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6195 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6196 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6197 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6198 if (pcbddc->benign_change) { 6199 Mat M; 6200 6201 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6202 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6203 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6204 ierr = MatDestroy(&M);CHKERRQ(ierr); 6205 } else { 6206 Mat eye; 6207 PetscScalar *array; 6208 6209 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6210 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6211 for (i=0;i<pcis->n;i++) { 6212 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6213 } 6214 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6215 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6216 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6217 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6218 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6219 } 6220 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6221 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6222 } 6223 if (pcbddc->user_ChangeOfBasisMatrix) { 6224 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6225 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6226 } else if (pcbddc->benign_have_null) { 6227 pcbddc->ChangeOfBasisMatrix = benign_global; 6228 } 6229 } 6230 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6231 IS is_global; 6232 const PetscInt *gidxs; 6233 6234 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6235 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6236 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6237 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6238 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6239 } 6240 } 6241 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6242 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6243 } 6244 6245 if (!pcbddc->fake_change) { 6246 /* add pressure dofs to set of primal nodes for numbering purposes */ 6247 for (i=0;i<pcbddc->benign_n;i++) { 6248 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6249 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6250 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6251 pcbddc->local_primal_size_cc++; 6252 pcbddc->local_primal_size++; 6253 } 6254 6255 /* check if a new primal space has been introduced (also take into account benign trick) */ 6256 pcbddc->new_primal_space_local = PETSC_TRUE; 6257 if (olocal_primal_size == pcbddc->local_primal_size) { 6258 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6259 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6260 if (!pcbddc->new_primal_space_local) { 6261 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6262 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6263 } 6264 } 6265 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6266 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6267 } 6268 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6269 6270 /* flush dbg viewer */ 6271 if (pcbddc->dbg_flag) { 6272 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6273 } 6274 6275 /* free workspace */ 6276 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6277 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6278 if (!pcbddc->adaptive_selection) { 6279 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6280 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6281 } else { 6282 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6283 pcbddc->adaptive_constraints_idxs_ptr, 6284 pcbddc->adaptive_constraints_data_ptr, 6285 pcbddc->adaptive_constraints_idxs, 6286 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6287 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6288 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6289 } 6290 PetscFunctionReturn(0); 6291 } 6292 6293 #undef __FUNCT__ 6294 #define __FUNCT__ "PCBDDCAnalyzeInterface" 6295 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6296 { 6297 ISLocalToGlobalMapping map; 6298 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6299 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6300 PetscInt ierr,i,N; 6301 6302 PetscFunctionBegin; 6303 if (pcbddc->recompute_topography) { 6304 pcbddc->graphanalyzed = PETSC_FALSE; 6305 /* Reset previously computed graph */ 6306 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6307 /* Init local Graph struct */ 6308 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6309 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6310 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6311 6312 /* Check validity of the csr graph passed in by the user */ 6313 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6314 6315 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6316 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 6317 PetscInt *xadj,*adjncy; 6318 PetscInt nvtxs; 6319 PetscBool flg_row=PETSC_FALSE; 6320 6321 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6322 if (flg_row) { 6323 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6324 pcbddc->computed_rowadj = PETSC_TRUE; 6325 } 6326 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6327 } 6328 if (pcbddc->dbg_flag) { 6329 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6330 } 6331 6332 /* Setup of Graph */ 6333 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6334 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6335 6336 /* attach info on disconnected subdomains if present */ 6337 if (pcbddc->n_local_subs) { 6338 PetscInt *local_subs; 6339 6340 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6341 for (i=0;i<pcbddc->n_local_subs;i++) { 6342 const PetscInt *idxs; 6343 PetscInt nl,j; 6344 6345 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6346 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6347 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6348 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6349 } 6350 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6351 pcbddc->mat_graph->local_subs = local_subs; 6352 } 6353 } 6354 6355 if (!pcbddc->graphanalyzed) { 6356 /* Graph's connected components analysis */ 6357 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6358 pcbddc->graphanalyzed = PETSC_TRUE; 6359 } 6360 PetscFunctionReturn(0); 6361 } 6362 6363 #undef __FUNCT__ 6364 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6365 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6366 { 6367 PetscInt i,j; 6368 PetscScalar *alphas; 6369 PetscErrorCode ierr; 6370 6371 PetscFunctionBegin; 6372 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6373 for (i=0;i<n;i++) { 6374 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6375 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6376 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6377 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6378 } 6379 ierr = PetscFree(alphas);CHKERRQ(ierr); 6380 PetscFunctionReturn(0); 6381 } 6382 6383 #undef __FUNCT__ 6384 #define __FUNCT__ "PCBDDCMatISGetSubassemblingPattern" 6385 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6386 { 6387 Mat A; 6388 PetscInt n_neighs,*neighs,*n_shared,**shared; 6389 PetscMPIInt size,rank,color; 6390 PetscInt *xadj,*adjncy; 6391 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6392 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6393 PetscInt void_procs,*procs_candidates = NULL; 6394 PetscInt xadj_count,*count; 6395 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6396 PetscSubcomm psubcomm; 6397 MPI_Comm subcomm; 6398 PetscErrorCode ierr; 6399 6400 PetscFunctionBegin; 6401 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6402 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6403 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6404 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6405 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6406 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6407 6408 if (have_void) *have_void = PETSC_FALSE; 6409 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6410 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6411 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6412 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6413 im_active = !!n; 6414 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6415 void_procs = size - active_procs; 6416 /* get ranks of of non-active processes in mat communicator */ 6417 if (void_procs) { 6418 PetscInt ncand; 6419 6420 if (have_void) *have_void = PETSC_TRUE; 6421 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6422 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6423 for (i=0,ncand=0;i<size;i++) { 6424 if (!procs_candidates[i]) { 6425 procs_candidates[ncand++] = i; 6426 } 6427 } 6428 /* force n_subdomains to be not greater that the number of non-active processes */ 6429 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6430 } 6431 6432 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6433 number of subdomains requested 1 -> send to master or first candidate in voids */ 6434 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6435 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6436 PetscInt issize,isidx,dest; 6437 if (*n_subdomains == 1) dest = 0; 6438 else dest = rank; 6439 if (im_active) { 6440 issize = 1; 6441 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6442 isidx = procs_candidates[dest]; 6443 } else { 6444 isidx = dest; 6445 } 6446 } else { 6447 issize = 0; 6448 isidx = -1; 6449 } 6450 if (*n_subdomains != 1) *n_subdomains = active_procs; 6451 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6452 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6453 PetscFunctionReturn(0); 6454 } 6455 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6456 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6457 threshold = PetscMax(threshold,2); 6458 6459 /* Get info on mapping */ 6460 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6461 6462 /* build local CSR graph of subdomains' connectivity */ 6463 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6464 xadj[0] = 0; 6465 xadj[1] = PetscMax(n_neighs-1,0); 6466 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6467 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6468 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6469 for (i=1;i<n_neighs;i++) 6470 for (j=0;j<n_shared[i];j++) 6471 count[shared[i][j]] += 1; 6472 6473 xadj_count = 0; 6474 for (i=1;i<n_neighs;i++) { 6475 for (j=0;j<n_shared[i];j++) { 6476 if (count[shared[i][j]] < threshold) { 6477 adjncy[xadj_count] = neighs[i]; 6478 adjncy_wgt[xadj_count] = n_shared[i]; 6479 xadj_count++; 6480 break; 6481 } 6482 } 6483 } 6484 xadj[1] = xadj_count; 6485 ierr = PetscFree(count);CHKERRQ(ierr); 6486 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6487 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6488 6489 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6490 6491 /* Restrict work on active processes only */ 6492 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6493 if (void_procs) { 6494 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6495 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6496 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6497 subcomm = PetscSubcommChild(psubcomm); 6498 } else { 6499 psubcomm = NULL; 6500 subcomm = PetscObjectComm((PetscObject)mat); 6501 } 6502 6503 v_wgt = NULL; 6504 if (!color) { 6505 ierr = PetscFree(xadj);CHKERRQ(ierr); 6506 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6507 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6508 } else { 6509 Mat subdomain_adj; 6510 IS new_ranks,new_ranks_contig; 6511 MatPartitioning partitioner; 6512 PetscInt rstart=0,rend=0; 6513 PetscInt *is_indices,*oldranks; 6514 PetscMPIInt size; 6515 PetscBool aggregate; 6516 6517 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6518 if (void_procs) { 6519 PetscInt prank = rank; 6520 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6521 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6522 for (i=0;i<xadj[1];i++) { 6523 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6524 } 6525 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6526 } else { 6527 oldranks = NULL; 6528 } 6529 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6530 if (aggregate) { /* TODO: all this part could be made more efficient */ 6531 PetscInt lrows,row,ncols,*cols; 6532 PetscMPIInt nrank; 6533 PetscScalar *vals; 6534 6535 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6536 lrows = 0; 6537 if (nrank<redprocs) { 6538 lrows = size/redprocs; 6539 if (nrank<size%redprocs) lrows++; 6540 } 6541 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6542 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6543 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6544 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6545 row = nrank; 6546 ncols = xadj[1]-xadj[0]; 6547 cols = adjncy; 6548 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6549 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6550 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6551 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6552 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6553 ierr = PetscFree(xadj);CHKERRQ(ierr); 6554 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6555 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6556 ierr = PetscFree(vals);CHKERRQ(ierr); 6557 if (use_vwgt) { 6558 Vec v; 6559 const PetscScalar *array; 6560 PetscInt nl; 6561 6562 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6563 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6564 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6565 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6566 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6567 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6568 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6569 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6570 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6571 ierr = VecDestroy(&v);CHKERRQ(ierr); 6572 } 6573 } else { 6574 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6575 if (use_vwgt) { 6576 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6577 v_wgt[0] = n; 6578 } 6579 } 6580 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6581 6582 /* Partition */ 6583 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6584 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6585 if (v_wgt) { 6586 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6587 } 6588 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6589 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6590 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6591 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6592 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6593 6594 /* renumber new_ranks to avoid "holes" in new set of processors */ 6595 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6596 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6597 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6598 if (!aggregate) { 6599 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6600 #if defined(PETSC_USE_DEBUG) 6601 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6602 #endif 6603 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6604 } else if (oldranks) { 6605 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6606 } else { 6607 ranks_send_to_idx[0] = is_indices[0]; 6608 } 6609 } else { 6610 PetscInt idxs[1]; 6611 PetscMPIInt tag; 6612 MPI_Request *reqs; 6613 6614 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6615 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6616 for (i=rstart;i<rend;i++) { 6617 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6618 } 6619 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6620 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6621 ierr = PetscFree(reqs);CHKERRQ(ierr); 6622 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6623 #if defined(PETSC_USE_DEBUG) 6624 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6625 #endif 6626 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6627 } else if (oldranks) { 6628 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6629 } else { 6630 ranks_send_to_idx[0] = idxs[0]; 6631 } 6632 } 6633 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6634 /* clean up */ 6635 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6636 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6637 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6638 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6639 } 6640 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6641 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6642 6643 /* assemble parallel IS for sends */ 6644 i = 1; 6645 if (!color) i=0; 6646 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6647 PetscFunctionReturn(0); 6648 } 6649 6650 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6651 6652 #undef __FUNCT__ 6653 #define __FUNCT__ "PCBDDCMatISSubassemble" 6654 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6655 { 6656 Mat local_mat; 6657 IS is_sends_internal; 6658 PetscInt rows,cols,new_local_rows; 6659 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6660 PetscBool ismatis,isdense,newisdense,destroy_mat; 6661 ISLocalToGlobalMapping l2gmap; 6662 PetscInt* l2gmap_indices; 6663 const PetscInt* is_indices; 6664 MatType new_local_type; 6665 /* buffers */ 6666 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6667 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6668 PetscInt *recv_buffer_idxs_local; 6669 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6670 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6671 /* MPI */ 6672 MPI_Comm comm,comm_n; 6673 PetscSubcomm subcomm; 6674 PetscMPIInt n_sends,n_recvs,commsize; 6675 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6676 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6677 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6678 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6679 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6680 PetscErrorCode ierr; 6681 6682 PetscFunctionBegin; 6683 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6684 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6685 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6686 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6687 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6688 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6689 PetscValidLogicalCollectiveBool(mat,reuse,6); 6690 PetscValidLogicalCollectiveInt(mat,nis,8); 6691 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6692 if (nvecs) { 6693 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6694 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6695 } 6696 /* further checks */ 6697 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6698 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6699 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6700 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6701 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6702 if (reuse && *mat_n) { 6703 PetscInt mrows,mcols,mnrows,mncols; 6704 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6705 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6706 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6707 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6708 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6709 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6710 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6711 } 6712 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6713 PetscValidLogicalCollectiveInt(mat,bs,0); 6714 6715 /* prepare IS for sending if not provided */ 6716 if (!is_sends) { 6717 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6718 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6719 } else { 6720 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6721 is_sends_internal = is_sends; 6722 } 6723 6724 /* get comm */ 6725 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6726 6727 /* compute number of sends */ 6728 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6729 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6730 6731 /* compute number of receives */ 6732 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6733 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6734 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6735 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6736 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6737 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6738 ierr = PetscFree(iflags);CHKERRQ(ierr); 6739 6740 /* restrict comm if requested */ 6741 subcomm = 0; 6742 destroy_mat = PETSC_FALSE; 6743 if (restrict_comm) { 6744 PetscMPIInt color,subcommsize; 6745 6746 color = 0; 6747 if (restrict_full) { 6748 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6749 } else { 6750 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6751 } 6752 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6753 subcommsize = commsize - subcommsize; 6754 /* check if reuse has been requested */ 6755 if (reuse) { 6756 if (*mat_n) { 6757 PetscMPIInt subcommsize2; 6758 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6759 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6760 comm_n = PetscObjectComm((PetscObject)*mat_n); 6761 } else { 6762 comm_n = PETSC_COMM_SELF; 6763 } 6764 } else { /* MAT_INITIAL_MATRIX */ 6765 PetscMPIInt rank; 6766 6767 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6768 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6769 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6770 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6771 comm_n = PetscSubcommChild(subcomm); 6772 } 6773 /* flag to destroy *mat_n if not significative */ 6774 if (color) destroy_mat = PETSC_TRUE; 6775 } else { 6776 comm_n = comm; 6777 } 6778 6779 /* prepare send/receive buffers */ 6780 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6781 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6782 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6783 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6784 if (nis) { 6785 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6786 } 6787 6788 /* Get data from local matrices */ 6789 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6790 /* TODO: See below some guidelines on how to prepare the local buffers */ 6791 /* 6792 send_buffer_vals should contain the raw values of the local matrix 6793 send_buffer_idxs should contain: 6794 - MatType_PRIVATE type 6795 - PetscInt size_of_l2gmap 6796 - PetscInt global_row_indices[size_of_l2gmap] 6797 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6798 */ 6799 else { 6800 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6801 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6802 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6803 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6804 send_buffer_idxs[1] = i; 6805 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6806 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6807 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6808 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6809 for (i=0;i<n_sends;i++) { 6810 ilengths_vals[is_indices[i]] = len*len; 6811 ilengths_idxs[is_indices[i]] = len+2; 6812 } 6813 } 6814 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6815 /* additional is (if any) */ 6816 if (nis) { 6817 PetscMPIInt psum; 6818 PetscInt j; 6819 for (j=0,psum=0;j<nis;j++) { 6820 PetscInt plen; 6821 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6822 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6823 psum += len+1; /* indices + lenght */ 6824 } 6825 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6826 for (j=0,psum=0;j<nis;j++) { 6827 PetscInt plen; 6828 const PetscInt *is_array_idxs; 6829 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6830 send_buffer_idxs_is[psum] = plen; 6831 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6832 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6833 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6834 psum += plen+1; /* indices + lenght */ 6835 } 6836 for (i=0;i<n_sends;i++) { 6837 ilengths_idxs_is[is_indices[i]] = psum; 6838 } 6839 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6840 } 6841 6842 buf_size_idxs = 0; 6843 buf_size_vals = 0; 6844 buf_size_idxs_is = 0; 6845 buf_size_vecs = 0; 6846 for (i=0;i<n_recvs;i++) { 6847 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6848 buf_size_vals += (PetscInt)olengths_vals[i]; 6849 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6850 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6851 } 6852 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6853 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6854 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6855 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6856 6857 /* get new tags for clean communications */ 6858 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6859 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6860 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6861 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6862 6863 /* allocate for requests */ 6864 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6865 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6866 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6867 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6868 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6869 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6870 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6871 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6872 6873 /* communications */ 6874 ptr_idxs = recv_buffer_idxs; 6875 ptr_vals = recv_buffer_vals; 6876 ptr_idxs_is = recv_buffer_idxs_is; 6877 ptr_vecs = recv_buffer_vecs; 6878 for (i=0;i<n_recvs;i++) { 6879 source_dest = onodes[i]; 6880 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6881 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6882 ptr_idxs += olengths_idxs[i]; 6883 ptr_vals += olengths_vals[i]; 6884 if (nis) { 6885 source_dest = onodes_is[i]; 6886 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6887 ptr_idxs_is += olengths_idxs_is[i]; 6888 } 6889 if (nvecs) { 6890 source_dest = onodes[i]; 6891 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6892 ptr_vecs += olengths_idxs[i]-2; 6893 } 6894 } 6895 for (i=0;i<n_sends;i++) { 6896 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6897 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6898 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6899 if (nis) { 6900 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6901 } 6902 if (nvecs) { 6903 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6904 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6905 } 6906 } 6907 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6908 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6909 6910 /* assemble new l2g map */ 6911 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6912 ptr_idxs = recv_buffer_idxs; 6913 new_local_rows = 0; 6914 for (i=0;i<n_recvs;i++) { 6915 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6916 ptr_idxs += olengths_idxs[i]; 6917 } 6918 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6919 ptr_idxs = recv_buffer_idxs; 6920 new_local_rows = 0; 6921 for (i=0;i<n_recvs;i++) { 6922 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6923 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6924 ptr_idxs += olengths_idxs[i]; 6925 } 6926 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6927 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6928 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6929 6930 /* infer new local matrix type from received local matrices type */ 6931 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6932 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6933 if (n_recvs) { 6934 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6935 ptr_idxs = recv_buffer_idxs; 6936 for (i=0;i<n_recvs;i++) { 6937 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6938 new_local_type_private = MATAIJ_PRIVATE; 6939 break; 6940 } 6941 ptr_idxs += olengths_idxs[i]; 6942 } 6943 switch (new_local_type_private) { 6944 case MATDENSE_PRIVATE: 6945 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6946 new_local_type = MATSEQAIJ; 6947 bs = 1; 6948 } else { /* if I receive only 1 dense matrix */ 6949 new_local_type = MATSEQDENSE; 6950 bs = 1; 6951 } 6952 break; 6953 case MATAIJ_PRIVATE: 6954 new_local_type = MATSEQAIJ; 6955 bs = 1; 6956 break; 6957 case MATBAIJ_PRIVATE: 6958 new_local_type = MATSEQBAIJ; 6959 break; 6960 case MATSBAIJ_PRIVATE: 6961 new_local_type = MATSEQSBAIJ; 6962 break; 6963 default: 6964 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6965 break; 6966 } 6967 } else { /* by default, new_local_type is seqdense */ 6968 new_local_type = MATSEQDENSE; 6969 bs = 1; 6970 } 6971 6972 /* create MATIS object if needed */ 6973 if (!reuse) { 6974 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6975 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6976 } else { 6977 /* it also destroys the local matrices */ 6978 if (*mat_n) { 6979 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6980 } else { /* this is a fake object */ 6981 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6982 } 6983 } 6984 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6985 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6986 6987 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6988 6989 /* Global to local map of received indices */ 6990 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6991 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6992 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6993 6994 /* restore attributes -> type of incoming data and its size */ 6995 buf_size_idxs = 0; 6996 for (i=0;i<n_recvs;i++) { 6997 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6998 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6999 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7000 } 7001 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7002 7003 /* set preallocation */ 7004 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7005 if (!newisdense) { 7006 PetscInt *new_local_nnz=0; 7007 7008 ptr_idxs = recv_buffer_idxs_local; 7009 if (n_recvs) { 7010 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7011 } 7012 for (i=0;i<n_recvs;i++) { 7013 PetscInt j; 7014 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7015 for (j=0;j<*(ptr_idxs+1);j++) { 7016 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7017 } 7018 } else { 7019 /* TODO */ 7020 } 7021 ptr_idxs += olengths_idxs[i]; 7022 } 7023 if (new_local_nnz) { 7024 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7025 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7026 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7027 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7028 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7029 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7030 } else { 7031 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7032 } 7033 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7034 } else { 7035 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7036 } 7037 7038 /* set values */ 7039 ptr_vals = recv_buffer_vals; 7040 ptr_idxs = recv_buffer_idxs_local; 7041 for (i=0;i<n_recvs;i++) { 7042 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7043 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7044 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7045 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7046 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7047 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7048 } else { 7049 /* TODO */ 7050 } 7051 ptr_idxs += olengths_idxs[i]; 7052 ptr_vals += olengths_vals[i]; 7053 } 7054 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7055 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7056 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7057 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7058 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7059 7060 #if 0 7061 if (!restrict_comm) { /* check */ 7062 Vec lvec,rvec; 7063 PetscReal infty_error; 7064 7065 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7066 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7067 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7068 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7069 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7070 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7071 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7072 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7073 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7074 } 7075 #endif 7076 7077 /* assemble new additional is (if any) */ 7078 if (nis) { 7079 PetscInt **temp_idxs,*count_is,j,psum; 7080 7081 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7082 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7083 ptr_idxs = recv_buffer_idxs_is; 7084 psum = 0; 7085 for (i=0;i<n_recvs;i++) { 7086 for (j=0;j<nis;j++) { 7087 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7088 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7089 psum += plen; 7090 ptr_idxs += plen+1; /* shift pointer to received data */ 7091 } 7092 } 7093 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7094 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7095 for (i=1;i<nis;i++) { 7096 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7097 } 7098 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7099 ptr_idxs = recv_buffer_idxs_is; 7100 for (i=0;i<n_recvs;i++) { 7101 for (j=0;j<nis;j++) { 7102 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7103 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7104 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7105 ptr_idxs += plen+1; /* shift pointer to received data */ 7106 } 7107 } 7108 for (i=0;i<nis;i++) { 7109 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7110 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7111 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7112 } 7113 ierr = PetscFree(count_is);CHKERRQ(ierr); 7114 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7115 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7116 } 7117 /* free workspace */ 7118 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7119 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7120 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7121 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7122 if (isdense) { 7123 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7124 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7125 } else { 7126 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7127 } 7128 if (nis) { 7129 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7130 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7131 } 7132 7133 if (nvecs) { 7134 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7135 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7136 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7137 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7138 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7139 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7140 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7141 /* set values */ 7142 ptr_vals = recv_buffer_vecs; 7143 ptr_idxs = recv_buffer_idxs_local; 7144 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7145 for (i=0;i<n_recvs;i++) { 7146 PetscInt j; 7147 for (j=0;j<*(ptr_idxs+1);j++) { 7148 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7149 } 7150 ptr_idxs += olengths_idxs[i]; 7151 ptr_vals += olengths_idxs[i]-2; 7152 } 7153 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7154 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7155 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7156 } 7157 7158 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7159 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7160 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7161 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7162 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7163 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7164 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7165 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7166 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7167 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7168 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7169 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7170 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7171 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7172 ierr = PetscFree(onodes);CHKERRQ(ierr); 7173 if (nis) { 7174 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7175 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7176 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7177 } 7178 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7179 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7180 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7181 for (i=0;i<nis;i++) { 7182 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7183 } 7184 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7185 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7186 } 7187 *mat_n = NULL; 7188 } 7189 PetscFunctionReturn(0); 7190 } 7191 7192 /* temporary hack into ksp private data structure */ 7193 #include <petsc/private/kspimpl.h> 7194 7195 #undef __FUNCT__ 7196 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 7197 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7198 { 7199 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7200 PC_IS *pcis = (PC_IS*)pc->data; 7201 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7202 Mat coarsedivudotp = NULL; 7203 Mat coarseG,t_coarse_mat_is; 7204 MatNullSpace CoarseNullSpace = NULL; 7205 ISLocalToGlobalMapping coarse_islg; 7206 IS coarse_is,*isarray; 7207 PetscInt i,im_active=-1,active_procs=-1; 7208 PetscInt nis,nisdofs,nisneu,nisvert; 7209 PC pc_temp; 7210 PCType coarse_pc_type; 7211 KSPType coarse_ksp_type; 7212 PetscBool multilevel_requested,multilevel_allowed; 7213 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7214 PetscInt ncoarse,nedcfield; 7215 PetscBool compute_vecs = PETSC_FALSE; 7216 PetscScalar *array; 7217 MatReuse coarse_mat_reuse; 7218 PetscBool restr, full_restr, have_void; 7219 PetscErrorCode ierr; 7220 7221 PetscFunctionBegin; 7222 /* Assign global numbering to coarse dofs */ 7223 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7224 PetscInt ocoarse_size; 7225 compute_vecs = PETSC_TRUE; 7226 ocoarse_size = pcbddc->coarse_size; 7227 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7228 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7229 /* see if we can avoid some work */ 7230 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7231 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7232 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7233 PC pc; 7234 PetscBool isbddc; 7235 7236 /* temporary workaround since PCBDDC does not have a reset method so far */ 7237 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 7238 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 7239 if (isbddc) { 7240 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 7241 } else { 7242 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7243 } 7244 coarse_reuse = PETSC_FALSE; 7245 } else { /* we can safely reuse already computed coarse matrix */ 7246 coarse_reuse = PETSC_TRUE; 7247 } 7248 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7249 coarse_reuse = PETSC_FALSE; 7250 } 7251 /* reset any subassembling information */ 7252 if (!coarse_reuse || pcbddc->recompute_topography) { 7253 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7254 } 7255 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7256 coarse_reuse = PETSC_TRUE; 7257 } 7258 /* assemble coarse matrix */ 7259 if (coarse_reuse && pcbddc->coarse_ksp) { 7260 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7261 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7262 coarse_mat_reuse = MAT_REUSE_MATRIX; 7263 } else { 7264 coarse_mat = NULL; 7265 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7266 } 7267 7268 /* creates temporary l2gmap and IS for coarse indexes */ 7269 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7270 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7271 7272 /* creates temporary MATIS object for coarse matrix */ 7273 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7274 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7275 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7276 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7277 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7278 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7279 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7280 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7281 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7282 7283 /* count "active" (i.e. with positive local size) and "void" processes */ 7284 im_active = !!(pcis->n); 7285 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7286 7287 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7288 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7289 /* full_restr : just use the receivers from the subassembling pattern */ 7290 coarse_mat_is = NULL; 7291 multilevel_allowed = PETSC_FALSE; 7292 multilevel_requested = PETSC_FALSE; 7293 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7294 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7295 if (multilevel_requested) { 7296 ncoarse = active_procs/pcbddc->coarsening_ratio; 7297 restr = PETSC_FALSE; 7298 full_restr = PETSC_FALSE; 7299 } else { 7300 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7301 restr = PETSC_TRUE; 7302 full_restr = PETSC_TRUE; 7303 } 7304 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7305 ncoarse = PetscMax(1,ncoarse); 7306 if (!pcbddc->coarse_subassembling) { 7307 if (pcbddc->coarsening_ratio > 1) { 7308 if (multilevel_requested) { 7309 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7310 } else { 7311 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7312 } 7313 } else { 7314 PetscMPIInt size,rank; 7315 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7316 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7317 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7318 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7319 } 7320 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7321 PetscInt psum; 7322 PetscMPIInt size; 7323 if (pcbddc->coarse_ksp) psum = 1; 7324 else psum = 0; 7325 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7326 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7327 if (ncoarse < size) have_void = PETSC_TRUE; 7328 } 7329 /* determine if we can go multilevel */ 7330 if (multilevel_requested) { 7331 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7332 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7333 } 7334 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7335 7336 /* dump subassembling pattern */ 7337 if (pcbddc->dbg_flag && multilevel_allowed) { 7338 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7339 } 7340 7341 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7342 nedcfield = -1; 7343 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7344 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7345 const PetscInt *idxs; 7346 ISLocalToGlobalMapping tmap; 7347 7348 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7349 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7350 /* allocate space for temporary storage */ 7351 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7352 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7353 /* allocate for IS array */ 7354 nisdofs = pcbddc->n_ISForDofsLocal; 7355 if (pcbddc->nedclocal) { 7356 if (pcbddc->nedfield > -1) { 7357 nedcfield = pcbddc->nedfield; 7358 } else { 7359 nedcfield = 0; 7360 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7361 nisdofs = 1; 7362 } 7363 } 7364 nisneu = !!pcbddc->NeumannBoundariesLocal; 7365 nisvert = 0; /* nisvert is not used */ 7366 nis = nisdofs + nisneu + nisvert; 7367 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7368 /* dofs splitting */ 7369 for (i=0;i<nisdofs;i++) { 7370 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7371 if (nedcfield != i) { 7372 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7373 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7374 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7375 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7376 } else { 7377 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7378 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7379 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7380 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7381 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7382 } 7383 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7384 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7385 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7386 } 7387 /* neumann boundaries */ 7388 if (pcbddc->NeumannBoundariesLocal) { 7389 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7390 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7391 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7392 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7393 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7394 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7395 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7396 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7397 } 7398 /* free memory */ 7399 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7400 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7401 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7402 } else { 7403 nis = 0; 7404 nisdofs = 0; 7405 nisneu = 0; 7406 nisvert = 0; 7407 isarray = NULL; 7408 } 7409 /* destroy no longer needed map */ 7410 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7411 7412 /* subassemble */ 7413 if (multilevel_allowed) { 7414 Vec vp[1]; 7415 PetscInt nvecs = 0; 7416 PetscBool reuse,reuser; 7417 7418 if (coarse_mat) reuse = PETSC_TRUE; 7419 else reuse = PETSC_FALSE; 7420 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7421 vp[0] = NULL; 7422 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7423 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7424 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7425 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7426 nvecs = 1; 7427 7428 if (pcbddc->divudotp) { 7429 Mat B,loc_divudotp; 7430 Vec v,p; 7431 IS dummy; 7432 PetscInt np; 7433 7434 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7435 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7436 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7437 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7438 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7439 ierr = VecSet(p,1.);CHKERRQ(ierr); 7440 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7441 ierr = VecDestroy(&p);CHKERRQ(ierr); 7442 ierr = MatDestroy(&B);CHKERRQ(ierr); 7443 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7444 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7445 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7446 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7447 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7448 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7449 ierr = VecDestroy(&v);CHKERRQ(ierr); 7450 } 7451 } 7452 if (reuser) { 7453 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7454 } else { 7455 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7456 } 7457 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7458 PetscScalar *arraym,*arrayv; 7459 PetscInt nl; 7460 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7461 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7462 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7463 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7464 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7465 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7466 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7467 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7468 } else { 7469 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7470 } 7471 } else { 7472 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7473 } 7474 if (coarse_mat_is || coarse_mat) { 7475 PetscMPIInt size; 7476 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7477 if (!multilevel_allowed) { 7478 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7479 } else { 7480 Mat A; 7481 7482 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7483 if (coarse_mat_is) { 7484 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7485 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7486 coarse_mat = coarse_mat_is; 7487 } 7488 /* be sure we don't have MatSeqDENSE as local mat */ 7489 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7490 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7491 } 7492 } 7493 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7494 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7495 7496 /* create local to global scatters for coarse problem */ 7497 if (compute_vecs) { 7498 PetscInt lrows; 7499 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7500 if (coarse_mat) { 7501 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7502 } else { 7503 lrows = 0; 7504 } 7505 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7506 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7507 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7508 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7509 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7510 } 7511 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7512 7513 /* set defaults for coarse KSP and PC */ 7514 if (multilevel_allowed) { 7515 coarse_ksp_type = KSPRICHARDSON; 7516 coarse_pc_type = PCBDDC; 7517 } else { 7518 coarse_ksp_type = KSPPREONLY; 7519 coarse_pc_type = PCREDUNDANT; 7520 } 7521 7522 /* print some info if requested */ 7523 if (pcbddc->dbg_flag) { 7524 if (!multilevel_allowed) { 7525 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7526 if (multilevel_requested) { 7527 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7528 } else if (pcbddc->max_levels) { 7529 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7530 } 7531 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7532 } 7533 } 7534 7535 /* communicate coarse discrete gradient */ 7536 coarseG = NULL; 7537 if (pcbddc->nedcG && multilevel_allowed) { 7538 MPI_Comm ccomm; 7539 if (coarse_mat) { 7540 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7541 } else { 7542 ccomm = MPI_COMM_NULL; 7543 } 7544 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7545 } 7546 7547 /* create the coarse KSP object only once with defaults */ 7548 if (coarse_mat) { 7549 PetscViewer dbg_viewer = NULL; 7550 if (pcbddc->dbg_flag) { 7551 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7552 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7553 } 7554 if (!pcbddc->coarse_ksp) { 7555 char prefix[256],str_level[16]; 7556 size_t len; 7557 7558 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7559 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7560 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7561 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7562 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7563 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7564 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7565 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7566 /* TODO is this logic correct? should check for coarse_mat type */ 7567 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7568 /* prefix */ 7569 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7570 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7571 if (!pcbddc->current_level) { 7572 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7573 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7574 } else { 7575 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7576 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7577 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7578 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7579 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7580 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7581 } 7582 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7583 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7584 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7585 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7586 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7587 /* allow user customization */ 7588 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7589 } 7590 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7591 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7592 if (nisdofs) { 7593 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7594 for (i=0;i<nisdofs;i++) { 7595 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7596 } 7597 } 7598 if (nisneu) { 7599 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7600 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7601 } 7602 if (nisvert) { 7603 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7604 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7605 } 7606 if (coarseG) { 7607 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7608 } 7609 7610 /* get some info after set from options */ 7611 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7612 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7613 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7614 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7615 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7616 isbddc = PETSC_FALSE; 7617 } 7618 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7619 if (isredundant) { 7620 KSP inner_ksp; 7621 PC inner_pc; 7622 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7623 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7624 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7625 } 7626 7627 /* parameters which miss an API */ 7628 if (isbddc) { 7629 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7630 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7631 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7632 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7633 if (pcbddc_coarse->benign_saddle_point) { 7634 Mat coarsedivudotp_is; 7635 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7636 IS row,col; 7637 const PetscInt *gidxs; 7638 PetscInt n,st,M,N; 7639 7640 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7641 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7642 st = st-n; 7643 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7644 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7645 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7646 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7647 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7648 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7649 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7650 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7651 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7652 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7653 ierr = ISDestroy(&row);CHKERRQ(ierr); 7654 ierr = ISDestroy(&col);CHKERRQ(ierr); 7655 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7656 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7657 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7658 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7659 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7660 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7661 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7662 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7663 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7664 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7665 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7666 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7667 } 7668 } 7669 7670 /* propagate symmetry info of coarse matrix */ 7671 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7672 if (pc->pmat->symmetric_set) { 7673 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7674 } 7675 if (pc->pmat->hermitian_set) { 7676 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7677 } 7678 if (pc->pmat->spd_set) { 7679 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7680 } 7681 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7682 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7683 } 7684 /* set operators */ 7685 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7686 if (pcbddc->dbg_flag) { 7687 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7688 } 7689 } 7690 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7691 ierr = PetscFree(isarray);CHKERRQ(ierr); 7692 #if 0 7693 { 7694 PetscViewer viewer; 7695 char filename[256]; 7696 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7697 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7698 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7699 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7700 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7701 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7702 } 7703 #endif 7704 7705 if (pcbddc->coarse_ksp) { 7706 Vec crhs,csol; 7707 7708 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7709 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7710 if (!csol) { 7711 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7712 } 7713 if (!crhs) { 7714 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7715 } 7716 } 7717 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7718 7719 /* compute null space for coarse solver if the benign trick has been requested */ 7720 if (pcbddc->benign_null) { 7721 7722 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7723 for (i=0;i<pcbddc->benign_n;i++) { 7724 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7725 } 7726 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7727 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7728 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7729 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7730 if (coarse_mat) { 7731 Vec nullv; 7732 PetscScalar *array,*array2; 7733 PetscInt nl; 7734 7735 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7736 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7737 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7738 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7739 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7740 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7741 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7742 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7743 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7744 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7745 } 7746 } 7747 7748 if (pcbddc->coarse_ksp) { 7749 PetscBool ispreonly; 7750 7751 if (CoarseNullSpace) { 7752 PetscBool isnull; 7753 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7754 if (isnull) { 7755 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7756 } 7757 /* TODO: add local nullspaces (if any) */ 7758 } 7759 /* setup coarse ksp */ 7760 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7761 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7762 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7763 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7764 KSP check_ksp; 7765 KSPType check_ksp_type; 7766 PC check_pc; 7767 Vec check_vec,coarse_vec; 7768 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7769 PetscInt its; 7770 PetscBool compute_eigs; 7771 PetscReal *eigs_r,*eigs_c; 7772 PetscInt neigs; 7773 const char *prefix; 7774 7775 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7776 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7777 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7778 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7779 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7780 /* prevent from setup unneeded object */ 7781 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7782 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7783 if (ispreonly) { 7784 check_ksp_type = KSPPREONLY; 7785 compute_eigs = PETSC_FALSE; 7786 } else { 7787 check_ksp_type = KSPGMRES; 7788 compute_eigs = PETSC_TRUE; 7789 } 7790 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7791 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7792 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7793 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7794 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7795 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7796 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7797 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7798 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7799 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7800 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7801 /* create random vec */ 7802 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7803 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7804 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7805 /* solve coarse problem */ 7806 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7807 /* set eigenvalue estimation if preonly has not been requested */ 7808 if (compute_eigs) { 7809 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7810 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7811 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7812 if (neigs) { 7813 lambda_max = eigs_r[neigs-1]; 7814 lambda_min = eigs_r[0]; 7815 if (pcbddc->use_coarse_estimates) { 7816 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7817 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7818 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7819 } 7820 } 7821 } 7822 } 7823 7824 /* check coarse problem residual error */ 7825 if (pcbddc->dbg_flag) { 7826 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7827 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7828 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7829 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7830 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7831 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7832 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7833 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7834 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7835 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7836 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7837 if (CoarseNullSpace) { 7838 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7839 } 7840 if (compute_eigs) { 7841 PetscReal lambda_max_s,lambda_min_s; 7842 KSPConvergedReason reason; 7843 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7844 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7845 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7846 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7847 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7848 for (i=0;i<neigs;i++) { 7849 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7850 } 7851 } 7852 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7853 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7854 } 7855 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7856 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7857 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7858 if (compute_eigs) { 7859 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7860 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7861 } 7862 } 7863 } 7864 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7865 /* print additional info */ 7866 if (pcbddc->dbg_flag) { 7867 /* waits until all processes reaches this point */ 7868 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7871 } 7872 7873 /* free memory */ 7874 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7875 PetscFunctionReturn(0); 7876 } 7877 7878 #undef __FUNCT__ 7879 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7880 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7881 { 7882 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7883 PC_IS* pcis = (PC_IS*)pc->data; 7884 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7885 IS subset,subset_mult,subset_n; 7886 PetscInt local_size,coarse_size=0; 7887 PetscInt *local_primal_indices=NULL; 7888 const PetscInt *t_local_primal_indices; 7889 PetscErrorCode ierr; 7890 7891 PetscFunctionBegin; 7892 /* Compute global number of coarse dofs */ 7893 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7894 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7895 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7896 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7897 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7898 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7899 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7900 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7901 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7902 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7903 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7904 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7905 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7906 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7907 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7908 7909 /* check numbering */ 7910 if (pcbddc->dbg_flag) { 7911 PetscScalar coarsesum,*array,*array2; 7912 PetscInt i; 7913 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7914 7915 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7916 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7917 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7918 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7919 /* counter */ 7920 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7921 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7922 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7923 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7924 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7925 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7926 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7927 for (i=0;i<pcbddc->local_primal_size;i++) { 7928 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7929 } 7930 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7931 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7932 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7933 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7934 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7935 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7936 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7937 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7938 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7939 for (i=0;i<pcis->n;i++) { 7940 if (array[i] != 0.0 && array[i] != array2[i]) { 7941 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7942 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7943 set_error = PETSC_TRUE; 7944 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7946 } 7947 } 7948 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7949 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7951 for (i=0;i<pcis->n;i++) { 7952 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7953 } 7954 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7955 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7956 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7957 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7958 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7959 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7960 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7961 PetscInt *gidxs; 7962 7963 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7964 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7967 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7968 for (i=0;i<pcbddc->local_primal_size;i++) { 7969 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7970 } 7971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7972 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7973 } 7974 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7975 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7976 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7977 } 7978 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7979 /* get back data */ 7980 *coarse_size_n = coarse_size; 7981 *local_primal_indices_n = local_primal_indices; 7982 PetscFunctionReturn(0); 7983 } 7984 7985 #undef __FUNCT__ 7986 #define __FUNCT__ "PCBDDCGlobalToLocal" 7987 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7988 { 7989 IS localis_t; 7990 PetscInt i,lsize,*idxs,n; 7991 PetscScalar *vals; 7992 PetscErrorCode ierr; 7993 7994 PetscFunctionBegin; 7995 /* get indices in local ordering exploiting local to global map */ 7996 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7997 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7998 for (i=0;i<lsize;i++) vals[i] = 1.0; 7999 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8000 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8001 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8002 if (idxs) { /* multilevel guard */ 8003 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8004 } 8005 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8006 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8007 ierr = PetscFree(vals);CHKERRQ(ierr); 8008 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8009 /* now compute set in local ordering */ 8010 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8011 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8012 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8013 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8014 for (i=0,lsize=0;i<n;i++) { 8015 if (PetscRealPart(vals[i]) > 0.5) { 8016 lsize++; 8017 } 8018 } 8019 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8020 for (i=0,lsize=0;i<n;i++) { 8021 if (PetscRealPart(vals[i]) > 0.5) { 8022 idxs[lsize++] = i; 8023 } 8024 } 8025 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8026 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8027 *localis = localis_t; 8028 PetscFunctionReturn(0); 8029 } 8030 8031 #undef __FUNCT__ 8032 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 8033 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8034 { 8035 PC_IS *pcis=(PC_IS*)pc->data; 8036 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8037 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8038 Mat S_j; 8039 PetscInt *used_xadj,*used_adjncy; 8040 PetscBool free_used_adj; 8041 PetscErrorCode ierr; 8042 8043 PetscFunctionBegin; 8044 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8045 free_used_adj = PETSC_FALSE; 8046 if (pcbddc->sub_schurs_layers == -1) { 8047 used_xadj = NULL; 8048 used_adjncy = NULL; 8049 } else { 8050 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8051 used_xadj = pcbddc->mat_graph->xadj; 8052 used_adjncy = pcbddc->mat_graph->adjncy; 8053 } else if (pcbddc->computed_rowadj) { 8054 used_xadj = pcbddc->mat_graph->xadj; 8055 used_adjncy = pcbddc->mat_graph->adjncy; 8056 } else { 8057 PetscBool flg_row=PETSC_FALSE; 8058 const PetscInt *xadj,*adjncy; 8059 PetscInt nvtxs; 8060 8061 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8062 if (flg_row) { 8063 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8064 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8065 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8066 free_used_adj = PETSC_TRUE; 8067 } else { 8068 pcbddc->sub_schurs_layers = -1; 8069 used_xadj = NULL; 8070 used_adjncy = NULL; 8071 } 8072 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8073 } 8074 } 8075 8076 /* setup sub_schurs data */ 8077 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8078 if (!sub_schurs->schur_explicit) { 8079 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8080 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8081 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8082 } else { 8083 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8084 PetscBool isseqaij,need_change = PETSC_FALSE; 8085 PetscInt benign_n; 8086 Mat change = NULL; 8087 Vec scaling = NULL; 8088 IS change_primal = NULL; 8089 8090 if (!pcbddc->use_vertices && reuse_solvers) { 8091 PetscInt n_vertices; 8092 8093 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8094 reuse_solvers = (PetscBool)!n_vertices; 8095 } 8096 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8097 if (!isseqaij) { 8098 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8099 if (matis->A == pcbddc->local_mat) { 8100 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8101 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8102 } else { 8103 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8104 } 8105 } 8106 if (!pcbddc->benign_change_explicit) { 8107 benign_n = pcbddc->benign_n; 8108 } else { 8109 benign_n = 0; 8110 } 8111 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8112 We need a global reduction to avoid possible deadlocks. 8113 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8114 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8115 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8116 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8117 need_change = (PetscBool)(!need_change); 8118 } 8119 /* If the user defines additional constraints, we import them here. 8120 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8121 if (need_change) { 8122 PC_IS *pcisf; 8123 PC_BDDC *pcbddcf; 8124 PC pcf; 8125 8126 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8127 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8128 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8129 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8130 /* hacks */ 8131 pcisf = (PC_IS*)pcf->data; 8132 pcisf->is_B_local = pcis->is_B_local; 8133 pcisf->vec1_N = pcis->vec1_N; 8134 pcisf->BtoNmap = pcis->BtoNmap; 8135 pcisf->n = pcis->n; 8136 pcisf->n_B = pcis->n_B; 8137 pcbddcf = (PC_BDDC*)pcf->data; 8138 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8139 pcbddcf->mat_graph = pcbddc->mat_graph; 8140 pcbddcf->use_faces = PETSC_TRUE; 8141 pcbddcf->use_change_of_basis = PETSC_TRUE; 8142 pcbddcf->use_change_on_faces = PETSC_TRUE; 8143 pcbddcf->use_qr_single = PETSC_TRUE; 8144 pcbddcf->fake_change = PETSC_TRUE; 8145 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8146 /* store information on primal vertices and change of basis (in local numbering) */ 8147 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8148 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8149 change = pcbddcf->ConstraintMatrix; 8150 pcbddcf->ConstraintMatrix = NULL; 8151 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8152 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8153 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8154 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8155 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8156 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8157 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8158 pcf->ops->destroy = NULL; 8159 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8160 } 8161 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8162 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8163 ierr = MatDestroy(&change);CHKERRQ(ierr); 8164 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8165 } 8166 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8167 8168 /* free adjacency */ 8169 if (free_used_adj) { 8170 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8171 } 8172 PetscFunctionReturn(0); 8173 } 8174 8175 #undef __FUNCT__ 8176 #define __FUNCT__ "PCBDDCInitSubSchurs" 8177 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8178 { 8179 PC_IS *pcis=(PC_IS*)pc->data; 8180 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8181 PCBDDCGraph graph; 8182 PetscErrorCode ierr; 8183 8184 PetscFunctionBegin; 8185 /* attach interface graph for determining subsets */ 8186 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8187 IS verticesIS,verticescomm; 8188 PetscInt vsize,*idxs; 8189 8190 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8191 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8192 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8193 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8194 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8195 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8196 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8197 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8198 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8199 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8200 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8201 } else { 8202 graph = pcbddc->mat_graph; 8203 } 8204 /* print some info */ 8205 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8206 IS vertices; 8207 PetscInt nv,nedges,nfaces; 8208 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8209 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8210 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8211 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8212 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8213 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8216 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8217 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8218 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8219 } 8220 8221 /* sub_schurs init */ 8222 if (!pcbddc->sub_schurs) { 8223 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8224 } 8225 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8226 8227 /* free graph struct */ 8228 if (pcbddc->sub_schurs_rebuild) { 8229 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8230 } 8231 PetscFunctionReturn(0); 8232 } 8233 8234 #undef __FUNCT__ 8235 #define __FUNCT__ "PCBDDCCheckOperator" 8236 PetscErrorCode PCBDDCCheckOperator(PC pc) 8237 { 8238 PC_IS *pcis=(PC_IS*)pc->data; 8239 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8240 PetscErrorCode ierr; 8241 8242 PetscFunctionBegin; 8243 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8244 IS zerodiag = NULL; 8245 Mat S_j,B0_B=NULL; 8246 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8247 PetscScalar *p0_check,*array,*array2; 8248 PetscReal norm; 8249 PetscInt i; 8250 8251 /* B0 and B0_B */ 8252 if (zerodiag) { 8253 IS dummy; 8254 8255 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8256 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8257 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8258 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8259 } 8260 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8261 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8262 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8263 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8264 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8265 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8266 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8267 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8268 /* S_j */ 8269 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8270 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8271 8272 /* mimic vector in \widetilde{W}_\Gamma */ 8273 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8274 /* continuous in primal space */ 8275 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8276 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8277 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8278 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8279 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8280 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8281 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8282 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8283 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8284 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8285 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8286 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8287 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8288 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8289 8290 /* assemble rhs for coarse problem */ 8291 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8292 /* local with Schur */ 8293 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8294 if (zerodiag) { 8295 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8296 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8297 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8298 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8299 } 8300 /* sum on primal nodes the local contributions */ 8301 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8302 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8303 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8304 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8305 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8306 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8307 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8308 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8309 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8310 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8311 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8312 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8313 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8314 /* scale primal nodes (BDDC sums contibutions) */ 8315 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8316 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8317 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8318 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8319 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8320 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8321 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8322 /* global: \widetilde{B0}_B w_\Gamma */ 8323 if (zerodiag) { 8324 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8325 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8326 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8327 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8328 } 8329 /* BDDC */ 8330 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8331 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8332 8333 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8334 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8335 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8336 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8337 for (i=0;i<pcbddc->benign_n;i++) { 8338 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8339 } 8340 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8341 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8342 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8343 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8344 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8345 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8346 } 8347 PetscFunctionReturn(0); 8348 } 8349 8350 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8351 #undef __FUNCT__ 8352 #define __FUNCT__ "MatMPIAIJRestrict" 8353 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8354 { 8355 Mat At; 8356 IS rows; 8357 PetscInt rst,ren; 8358 PetscErrorCode ierr; 8359 PetscLayout rmap; 8360 8361 PetscFunctionBegin; 8362 rst = ren = 0; 8363 if (ccomm != MPI_COMM_NULL) { 8364 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8365 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8366 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8367 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8368 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8369 } 8370 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8371 ierr = MatGetSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8372 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8373 8374 if (ccomm != MPI_COMM_NULL) { 8375 Mat_MPIAIJ *a,*b; 8376 IS from,to; 8377 Vec gvec; 8378 PetscInt lsize; 8379 8380 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8381 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8382 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8383 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8384 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8385 a = (Mat_MPIAIJ*)At->data; 8386 b = (Mat_MPIAIJ*)(*B)->data; 8387 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8388 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8389 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8390 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8391 b->A = a->A; 8392 b->B = a->B; 8393 8394 b->donotstash = a->donotstash; 8395 b->roworiented = a->roworiented; 8396 b->rowindices = 0; 8397 b->rowvalues = 0; 8398 b->getrowactive = PETSC_FALSE; 8399 8400 (*B)->rmap = rmap; 8401 (*B)->factortype = A->factortype; 8402 (*B)->assembled = PETSC_TRUE; 8403 (*B)->insertmode = NOT_SET_VALUES; 8404 (*B)->preallocated = PETSC_TRUE; 8405 8406 if (a->colmap) { 8407 #if defined(PETSC_USE_CTABLE) 8408 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8409 #else 8410 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8411 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8412 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8413 #endif 8414 } else b->colmap = 0; 8415 if (a->garray) { 8416 PetscInt len; 8417 len = a->B->cmap->n; 8418 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8419 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8420 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8421 } else b->garray = 0; 8422 8423 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8424 b->lvec = a->lvec; 8425 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8426 8427 /* cannot use VecScatterCopy */ 8428 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8429 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8430 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8431 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8432 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8433 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8434 ierr = ISDestroy(&from);CHKERRQ(ierr); 8435 ierr = ISDestroy(&to);CHKERRQ(ierr); 8436 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8437 } 8438 ierr = MatDestroy(&At);CHKERRQ(ierr); 8439 PetscFunctionReturn(0); 8440 } 8441