1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* returns B s.t. range(B) _|_ range(A) */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatDense_OrthogonalComplement" 12 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 13 { 14 #if !defined(PETSC_USE_COMPLEX) 15 PetscScalar *uwork,*data,*U, ds = 0.; 16 PetscReal *sing; 17 PetscBLASInt bM,bN,lwork,lierr,di = 1; 18 PetscInt ulw,i,nr,nc,n; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 #if defined(PETSC_MISSING_LAPACK_GESVD) 23 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 24 #endif 25 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 26 if (!nr || !nc) PetscFunctionReturn(0); 27 28 /* workspace */ 29 if (!work) { 30 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 31 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 32 } else { 33 ulw = lw; 34 uwork = work; 35 } 36 n = PetscMin(nr,nc); 37 if (!rwork) { 38 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 39 } else { 40 sing = rwork; 41 } 42 43 /* SVD */ 44 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 48 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 49 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 50 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 51 ierr = PetscFPTrapPop();CHKERRQ(ierr); 52 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 53 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 54 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 55 if (!rwork) { 56 ierr = PetscFree(sing);CHKERRQ(ierr); 57 } 58 if (!work) { 59 ierr = PetscFree(uwork);CHKERRQ(ierr); 60 } 61 /* create B */ 62 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 63 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 65 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscFree(U);CHKERRQ(ierr); 67 #else 68 PetscFunctionBegin; 69 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 70 #endif 71 PetscFunctionReturn(0); 72 } 73 74 /* TODO REMOVE */ 75 #if defined(PRINT_GDET) 76 static int inc = 0; 77 static int lev = 0; 78 #endif 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 82 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 83 { 84 PetscErrorCode ierr; 85 Mat GE,GEd; 86 PetscInt rsize,csize,esize; 87 PetscScalar *ptr; 88 89 PetscFunctionBegin; 90 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 91 if (!esize) PetscFunctionReturn(0); 92 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 93 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 94 95 /* gradients */ 96 ptr = work + 5*esize; 97 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 99 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 100 ierr = MatDestroy(&GE);CHKERRQ(ierr); 101 102 /* constants */ 103 ptr += rsize*csize; 104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 105 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 107 ierr = MatDestroy(&GE);CHKERRQ(ierr); 108 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 109 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 110 111 if (corners) { 112 Mat GEc; 113 PetscScalar *vals,v; 114 115 ierr = MatGetSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 116 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 117 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 118 /* TODO fix me */ 119 v = PetscAbsScalar(vals[0]); 120 v = 1.; 121 cvals[0] = vals[0]/v; 122 cvals[1] = vals[1]/v; 123 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 124 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 125 #if defined(PRINT_GDET) 126 { 127 PetscViewer viewer; 128 char filename[256]; 129 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 130 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 131 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 132 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 133 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 134 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 135 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 136 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 137 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 138 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 139 } 140 #endif 141 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 142 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 143 } 144 145 PetscFunctionReturn(0); 146 } 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "PCBDDCNedelecSupport" 150 PetscErrorCode PCBDDCNedelecSupport(PC pc) 151 { 152 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 153 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 154 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 155 MatNullSpace nnsp; 156 Vec tvec,*quads; 157 PetscSF sfv; 158 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 159 MPI_Comm comm; 160 IS lned,primals,allprimals,nedfieldlocal; 161 IS *eedges,*extrows,*extcols,*alleedges; 162 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee,bter; 163 PetscScalar *vals,*work; 164 PetscReal *rwork; 165 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 166 PetscInt ne,nv,Lv,order,n,field; 167 PetscInt n_neigh,*neigh,*n_shared,**shared; 168 PetscInt i,j,extmem,cum,maxsize,nee,nquads=2; 169 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 170 PetscInt *sfvleaves,*sfvroots; 171 PetscInt *corners,*cedges; 172 #if defined(PETSC_USE_DEBUG) 173 PetscInt *emarks; 174 #endif 175 PetscBool print,eerr,done,lrc[2],conforming,global; 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 /* test variable order code and print debug info TODO: to be removed */ 180 print = PETSC_FALSE; 181 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 182 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 183 184 /* Return to caller if there are no edges in the decomposition */ 185 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 186 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 187 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 188 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 189 lrc[0] = PETSC_FALSE; 190 for (i=0;i<n;i++) { 191 if (PetscRealPart(vals[i]) > 2.) { 192 lrc[0] = PETSC_TRUE; 193 break; 194 } 195 } 196 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 197 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 198 if (!lrc[1]) PetscFunctionReturn(0); 199 200 /* If the discrete gradient is defined for a subset of dofs and global is true, 201 it assumes G is given in global ordering for all the dofs. 202 Otherwise, the ordering is global for the Nedelec field */ 203 order = pcbddc->nedorder; 204 conforming = pcbddc->conforming; 205 field = pcbddc->nedfield; 206 global = pcbddc->nedglobal; 207 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 208 if (pcbddc->n_ISForDofsLocal && field > -1) { 209 PetscBool setprimal = PETSC_FALSE; 210 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field_primal",&setprimal,NULL);CHKERRQ(ierr); 211 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 212 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 213 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 214 if (setprimal) { 215 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,nedfieldlocal);CHKERRQ(ierr); 216 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 217 PetscFunctionReturn(0); 218 } 219 } else if (!pcbddc->n_ISForDofsLocal) { 220 PetscBool testnedfield = PETSC_FALSE; 221 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 222 if (!testnedfield) { 223 ne = n; 224 nedfieldlocal = NULL; 225 } else { 226 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 227 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 228 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 230 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 231 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 232 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 233 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 234 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 235 for (i=0,cum=0;i<n;i++) { 236 if (matis->sf_leafdata[i] > 1) { 237 matis->sf_leafdata[cum++] = i; 238 } 239 } 240 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 241 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 242 } 243 global = PETSC_TRUE; 244 } else { 245 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 246 } 247 248 if (nedfieldlocal) { /* merge with previous code when testing is done */ 249 IS is; 250 251 /* need to map from the local Nedelec field to local numbering */ 252 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 253 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 254 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 256 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 257 if (global) { 258 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 259 el2g = al2g; 260 } else { 261 IS gis; 262 263 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 264 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 265 ierr = ISDestroy(&gis);CHKERRQ(ierr); 266 } 267 ierr = ISDestroy(&is);CHKERRQ(ierr); 268 } else { 269 /* restore default */ 270 pcbddc->nedfield = -1; 271 /* one ref for the destruction of al2g, one for el2g */ 272 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 273 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 274 el2g = al2g; 275 fl2g = NULL; 276 } 277 278 /* Sanity checks */ 279 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 280 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 281 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 282 283 /* Drop connections for interior edges */ 284 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 285 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 286 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 287 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 288 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 289 if (nedfieldlocal) { 290 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 291 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 292 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 293 } else { 294 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 295 } 296 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 297 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 298 if (global) { 299 PetscInt rst; 300 301 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 302 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 303 if (matis->sf_rootdata[i] < 2) { 304 matis->sf_rootdata[cum++] = i + rst; 305 } 306 } 307 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 308 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 309 } else { 310 PetscInt *tbz; 311 312 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 313 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 314 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 315 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 316 for (i=0,cum=0;i<ne;i++) 317 if (matis->sf_leafdata[idxs[i]] == 1) 318 tbz[cum++] = i; 319 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 320 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 321 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 322 ierr = PetscFree(tbz);CHKERRQ(ierr); 323 } 324 325 /* Extract subdomain relevant rows of G */ 326 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 327 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 328 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 329 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 330 ierr = ISDestroy(&lned);CHKERRQ(ierr); 331 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 332 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 333 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 334 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 335 if (print) { 336 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 337 ierr = MatView(lG,NULL);CHKERRQ(ierr); 338 } 339 340 /* SF for nodal communications */ 341 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 342 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 343 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 344 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 345 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 346 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 347 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 348 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 349 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 350 351 /* Destroy temporary G created in MATIS format and modified G */ 352 ierr = MatDestroy(&G);CHKERRQ(ierr); 353 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 354 355 /* Save lG */ 356 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 357 358 /* Analyze the edge-nodes connections (duplicate lG) */ 359 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 360 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 361 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 362 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 363 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 364 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 365 /* need to import the boundary specification to ensure the 366 proper detection of coarse edges' endpoints */ 367 if (pcbddc->DirichletBoundariesLocal) { 368 IS is; 369 370 if (fl2g) { 371 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 372 } else { 373 is = pcbddc->DirichletBoundariesLocal; 374 } 375 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 376 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 377 for (i=0;i<cum;i++) { 378 if (idxs[i] >= 0) { 379 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 380 } 381 } 382 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 383 if (fl2g) { 384 ierr = ISDestroy(&is);CHKERRQ(ierr); 385 } 386 } 387 if (pcbddc->NeumannBoundariesLocal) { 388 IS is; 389 390 if (fl2g) { 391 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 392 } else { 393 is = pcbddc->NeumannBoundariesLocal; 394 } 395 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 396 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 397 for (i=0;i<cum;i++) { 398 if (idxs[i] >= 0) { 399 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 400 } 401 } 402 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 403 if (fl2g) { 404 ierr = ISDestroy(&is);CHKERRQ(ierr); 405 } 406 } 407 408 /* need to remove coarse faces' dofs to ensure the 409 proper detection of coarse edges' endpoints */ 410 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 411 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 412 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 413 for (i=1;i<n_neigh;i++) 414 for (j=0;j<n_shared[i];j++) 415 marks[shared[i][j]]++; 416 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 417 for (i=0;i<ne;i++) { 418 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 419 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 420 } 421 } 422 423 if (!conforming) { 424 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 425 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 426 } 427 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 428 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 429 cum = 0; 430 for (i=0;i<ne;i++) { 431 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 432 if (!PetscBTLookup(btee,i)) { 433 marks[cum++] = i; 434 continue; 435 } 436 /* set badly connected edge dofs as primal */ 437 if (!conforming) { 438 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 439 marks[cum++] = i; 440 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 441 for (j=ii[i];j<ii[i+1];j++) { 442 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 443 } 444 } else { 445 /* every edge dofs should be connected trough a certain number of nodal dofs 446 to other edge dofs belonging to coarse edges 447 - at most 2 endpoints 448 - order-1 interior nodal dofs 449 - no undefined nodal dofs (nconn < order) 450 */ 451 PetscInt ends = 0,ints = 0, undef = 0; 452 for (j=ii[i];j<ii[i+1];j++) { 453 PetscInt v = jj[j],k; 454 PetscInt nconn = iit[v+1]-iit[v]; 455 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 456 if (nconn > order) ends++; 457 else if (nconn == order) ints++; 458 else undef++; 459 } 460 if (undef || ends > 2 || ints != order -1) { 461 marks[cum++] = i; 462 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 463 for (j=ii[i];j<ii[i+1];j++) { 464 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 465 } 466 } 467 } 468 } 469 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 470 if (!order && ii[i+1] != ii[i]) { 471 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 472 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 473 } 474 } 475 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 476 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 477 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 478 if (!conforming) { 479 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 480 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 481 } 482 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 483 /* identify splitpoints and corner candidates */ 484 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 485 if (print) { 486 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 487 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 488 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 489 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 490 } 491 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 492 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 493 for (i=0;i<nv;i++) { 494 PetscInt ord = order, test = ii[i+1]-ii[i]; 495 if (!order) { /* variable order */ 496 PetscReal vorder = 0.; 497 498 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 499 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 500 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 501 ord = 1; 502 } 503 #if defined(PETSC_USE_DEBUG) 504 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 505 #endif 506 if (test >= 3*ord) { /* splitpoints */ 507 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 508 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 509 } else if (test == ord) { 510 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 511 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 512 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 513 } else { 514 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 515 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 516 } 517 } 518 } 519 520 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 521 if (order != 1) { 522 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 523 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 524 for (i=0;i<nv;i++) { 525 if (PetscBTLookup(btvcand,i)) { 526 PetscBool found = PETSC_FALSE; 527 for (j=ii[i];j<ii[i+1] && !found;j++) { 528 PetscInt k,e = jj[j]; 529 if (PetscBTLookup(bte,e)) continue; 530 for (k=iit[e];k<iit[e+1];k++) { 531 PetscInt v = jjt[k]; 532 if (v != i && PetscBTLookup(btvcand,v)) { 533 found = PETSC_TRUE; 534 break; 535 } 536 } 537 } 538 if (!found) { 539 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 540 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 541 } else { 542 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 543 } 544 } 545 } 546 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 547 } 548 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 549 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 550 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 551 552 /* Get the local G^T explicitly */ 553 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 554 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 555 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 556 557 /* Mark interior nodal dofs */ 558 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 559 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 560 for (i=1;i<n_neigh;i++) { 561 for (j=0;j<n_shared[i];j++) { 562 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 563 } 564 } 565 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 566 567 /* communicate corners and splitpoints */ 568 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 569 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 570 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 571 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 572 573 if (print) { 574 IS tbz; 575 576 cum = 0; 577 for (i=0;i<nv;i++) 578 if (sfvleaves[i]) 579 vmarks[cum++] = i; 580 581 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 582 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 583 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 584 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 585 } 586 587 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 588 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 589 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 590 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 591 592 /* Zero rows of lGt corresponding to identified corners 593 and interior nodal dofs */ 594 cum = 0; 595 for (i=0;i<nv;i++) { 596 if (sfvleaves[i]) { 597 vmarks[cum++] = i; 598 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 599 } 600 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 601 } 602 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 603 if (print) { 604 IS tbz; 605 606 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 607 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 608 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 609 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 610 } 611 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 612 ierr = PetscFree(vmarks);CHKERRQ(ierr); 613 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 614 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 615 616 /* Recompute G */ 617 ierr = MatDestroy(&lG);CHKERRQ(ierr); 618 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 619 if (print) { 620 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 621 ierr = MatView(lG,NULL);CHKERRQ(ierr); 622 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 623 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 624 } 625 626 /* Get primal dofs (if any) */ 627 cum = 0; 628 for (i=0;i<ne;i++) { 629 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 630 } 631 if (fl2g) { 632 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 633 } 634 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 635 if (print) { 636 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 637 ierr = ISView(primals,NULL);CHKERRQ(ierr); 638 } 639 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 640 /* TODO: what if the user passed in some of them ? */ 641 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 642 ierr = ISDestroy(&primals);CHKERRQ(ierr); 643 644 /* Compute edge connectivity */ 645 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 646 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 647 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 648 if (fl2g) { 649 PetscBT btf; 650 PetscInt *iia,*jja,*iiu,*jju; 651 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 652 653 /* create CSR for all local dofs */ 654 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 655 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 656 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 657 iiu = pcbddc->mat_graph->xadj; 658 jju = pcbddc->mat_graph->adjncy; 659 } else if (pcbddc->use_local_adj) { 660 rest = PETSC_TRUE; 661 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 662 } else { 663 free = PETSC_TRUE; 664 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 665 iiu[0] = 0; 666 for (i=0;i<n;i++) { 667 iiu[i+1] = i+1; 668 jju[i] = -1; 669 } 670 } 671 672 /* import sizes of CSR */ 673 iia[0] = 0; 674 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 675 676 /* overwrite entries corresponding to the Nedelec field */ 677 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 678 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 679 for (i=0;i<ne;i++) { 680 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 681 iia[idxs[i]+1] = ii[i+1]-ii[i]; 682 } 683 684 /* iia in CSR */ 685 for (i=0;i<n;i++) iia[i+1] += iia[i]; 686 687 /* jja in CSR */ 688 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 689 for (i=0;i<n;i++) 690 if (!PetscBTLookup(btf,i)) 691 for (j=0;j<iiu[i+1]-iiu[i];j++) 692 jja[iia[i]+j] = jju[iiu[i]+j]; 693 694 /* map edge dofs connectivity */ 695 if (jj) { 696 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 697 for (i=0;i<ne;i++) { 698 PetscInt e = idxs[i]; 699 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 700 } 701 } 702 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 703 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 704 if (rest) { 705 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 706 } 707 if (free) { 708 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 709 } 710 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 711 } else { 712 if (jj) { 713 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 714 } 715 } 716 717 /* Analyze interface for edge dofs */ 718 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 719 720 /* Get coarse edges in the edge space */ 721 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 722 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 723 724 if (fl2g) { 725 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 726 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 727 for (i=0;i<nee;i++) { 728 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 729 } 730 } else { 731 eedges = alleedges; 732 primals = allprimals; 733 } 734 735 /* Mark fine edge dofs with their coarse edge id */ 736 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 738 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 739 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 740 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 741 if (print) { 742 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 743 ierr = ISView(primals,NULL);CHKERRQ(ierr); 744 } 745 746 maxsize = 0; 747 for (i=0;i<nee;i++) { 748 PetscInt size,mark = i+1; 749 750 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 751 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 752 for (j=0;j<size;j++) marks[idxs[j]] = mark; 753 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 754 maxsize = PetscMax(maxsize,size); 755 } 756 757 /* Find coarse edge endpoints */ 758 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 759 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 760 for (i=0;i<nee;i++) { 761 PetscInt mark = i+1,size; 762 763 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 764 if (!size && nedfieldlocal) continue; 765 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 766 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 767 if (print) { 768 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 769 ISView(eedges[i],NULL); 770 } 771 for (j=0;j<size;j++) { 772 PetscInt k, ee = idxs[j]; 773 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 774 for (k=ii[ee];k<ii[ee+1];k++) { 775 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 776 if (PetscBTLookup(btv,jj[k])) { 777 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 778 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 779 PetscInt k2; 780 PetscBool corner = PETSC_FALSE; 781 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 782 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 783 /* it's a corner if either is connected with an edge dof belonging to a different cc or 784 if the edge dof lie on the natural part of the boundary */ 785 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 786 corner = PETSC_TRUE; 787 break; 788 } 789 } 790 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 791 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 792 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 793 } else { 794 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 795 } 796 } 797 } 798 } 799 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 800 } 801 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 802 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 803 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 804 805 /* Reset marked primal dofs */ 806 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 807 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 808 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 809 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 810 811 /* Now use the initial lG */ 812 ierr = MatDestroy(&lG);CHKERRQ(ierr); 813 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 814 lG = lGinit; 815 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 816 817 /* Compute extended cols indices */ 818 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 819 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 820 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 821 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 822 i *= maxsize; 823 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 824 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 825 eerr = PETSC_FALSE; 826 for (i=0;i<nee;i++) { 827 PetscInt size,found = 0; 828 829 cum = 0; 830 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 831 if (!size && nedfieldlocal) continue; 832 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 833 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 834 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 835 for (j=0;j<size;j++) { 836 PetscInt k,ee = idxs[j]; 837 for (k=ii[ee];k<ii[ee+1];k++) { 838 PetscInt vv = jj[k]; 839 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 840 else if (!PetscBTLookupSet(btvc,vv)) found++; 841 } 842 } 843 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 844 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 845 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 846 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 847 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 848 /* it may happen that endpoints are not defined at this point 849 if it is the case, mark this edge for a second pass */ 850 if (cum != size -1 || found != 2) { 851 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 852 if (print) { 853 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 854 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 855 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 856 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 857 } 858 eerr = PETSC_TRUE; 859 } 860 } 861 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 862 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 863 if (done) { 864 PetscInt *newprimals; 865 866 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 867 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 868 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 869 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 870 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 871 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 872 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 873 for (i=0;i<nee;i++) { 874 PetscBool has_candidates = PETSC_FALSE; 875 if (PetscBTLookup(bter,i)) { 876 PetscInt size,mark = i+1; 877 878 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 879 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 880 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 881 for (j=0;j<size;j++) { 882 PetscInt k,ee = idxs[j]; 883 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 884 for (k=ii[ee];k<ii[ee+1];k++) { 885 /* set all candidates located on the edge as corners */ 886 if (PetscBTLookup(btvcand,jj[k])) { 887 PetscInt k2,vv = jj[k]; 888 has_candidates = PETSC_TRUE; 889 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 890 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 891 /* set all edge dofs connected to candidate as primals */ 892 for (k2=iit[vv];k2<iit[vv+1];k2++) { 893 if (marks[jjt[k2]] == mark) { 894 PetscInt k3,ee2 = jjt[k2]; 895 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 896 newprimals[cum++] = ee2; 897 /* finally set the new corners */ 898 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 899 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 900 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 901 } 902 } 903 } 904 } else { 905 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 906 } 907 } 908 } 909 if (!has_candidates) { /* circular edge */ 910 PetscInt k, ee = idxs[0],*tmarks; 911 912 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 913 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 914 for (k=ii[ee];k<ii[ee+1];k++) { 915 PetscInt k2; 916 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 917 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 918 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 919 } 920 for (j=0;j<size;j++) { 921 if (tmarks[idxs[j]] > 1) { 922 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 923 newprimals[cum++] = idxs[j]; 924 } 925 } 926 ierr = PetscFree(tmarks);CHKERRQ(ierr); 927 } 928 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 929 } 930 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 931 } 932 ierr = PetscFree(extcols);CHKERRQ(ierr); 933 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 934 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 935 if (fl2g) { 936 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 937 ierr = ISDestroy(&primals);CHKERRQ(ierr); 938 for (i=0;i<nee;i++) { 939 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 940 } 941 ierr = PetscFree(eedges);CHKERRQ(ierr); 942 } 943 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 944 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 945 ierr = PetscFree(newprimals);CHKERRQ(ierr); 946 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 947 ierr = ISDestroy(&primals);CHKERRQ(ierr); 948 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 949 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 950 if (fl2g) { 951 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 952 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 953 for (i=0;i<nee;i++) { 954 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 955 } 956 } else { 957 eedges = alleedges; 958 primals = allprimals; 959 } 960 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 961 962 /* Mark again */ 963 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 964 for (i=0;i<nee;i++) { 965 PetscInt size,mark = i+1; 966 967 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 968 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 969 for (j=0;j<size;j++) marks[idxs[j]] = mark; 970 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 971 } 972 if (print) { 973 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 974 ierr = ISView(primals,NULL);CHKERRQ(ierr); 975 } 976 977 /* Recompute extended cols */ 978 eerr = PETSC_FALSE; 979 for (i=0;i<nee;i++) { 980 PetscInt size; 981 982 cum = 0; 983 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 984 if (!size && nedfieldlocal) continue; 985 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 986 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 987 for (j=0;j<size;j++) { 988 PetscInt k,ee = idxs[j]; 989 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 990 } 991 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 992 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 993 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 994 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 995 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 996 if (cum != size -1) { 997 if (print) { 998 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 999 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1000 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1001 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1002 } 1003 eerr = PETSC_TRUE; 1004 } 1005 } 1006 } 1007 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1008 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1009 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1010 /* an error should not occur at this point */ 1011 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1012 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1013 1014 /* Check the number of endpoints */ 1015 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1016 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1017 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1018 for (i=0;i<nee;i++) { 1019 PetscInt size, found = 0, gc[2]; 1020 1021 /* init with defaults */ 1022 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1023 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1024 if (!size && nedfieldlocal) continue; 1025 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1026 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1027 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1028 for (j=0;j<size;j++) { 1029 PetscInt k,ee = idxs[j]; 1030 for (k=ii[ee];k<ii[ee+1];k++) { 1031 PetscInt vv = jj[k]; 1032 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1033 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1034 corners[i*2+found++] = vv; 1035 } 1036 } 1037 } 1038 if (found != 2) { 1039 PetscInt e; 1040 if (fl2g) { 1041 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1042 } else { 1043 e = idxs[0]; 1044 } 1045 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1046 } 1047 /* WARNING : this depends on how pcbddc->primal_indices_local_idxs is filled up in PCBDDConstraintsSetUp */ 1048 cedges[i] = idxs[0]; 1049 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1050 if (gc[0] > gc[1]) { 1051 PetscInt swap = corners[2*i]; 1052 corners[2*i] = corners[2*i+1]; 1053 corners[2*i+1] = swap; 1054 } 1055 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1056 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1057 } 1058 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1059 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1060 1061 #if defined(PETSC_USE_DEBUG) 1062 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1063 not interfere with neighbouring coarse edges */ 1064 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1065 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1066 for (i=0;i<nv;i++) { 1067 PetscInt emax = 0,eemax = 0; 1068 1069 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1070 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1071 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1072 for (j=1;j<nee+1;j++) { 1073 if (emax < emarks[j]) { 1074 emax = emarks[j]; 1075 eemax = j; 1076 } 1077 } 1078 /* not relevant for edges */ 1079 if (!eemax) continue; 1080 1081 for (j=ii[i];j<ii[i+1];j++) { 1082 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1083 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1084 } 1085 } 1086 } 1087 ierr = PetscFree(emarks);CHKERRQ(ierr); 1088 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1089 #endif 1090 1091 /* Compute extended rows indices for edge blocks of the change of basis */ 1092 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1093 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1094 extmem *= maxsize; 1095 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1096 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1097 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1098 for (i=0;i<nv;i++) { 1099 PetscInt mark = 0,size,start; 1100 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1101 for (j=ii[i];j<ii[i+1];j++) 1102 if (marks[jj[j]] && !mark) 1103 mark = marks[jj[j]]; 1104 1105 /* not relevant */ 1106 if (!mark) continue; 1107 1108 /* import extended row */ 1109 mark--; 1110 start = mark*extmem+extrowcum[mark]; 1111 size = ii[i+1]-ii[i]; 1112 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1113 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1114 extrowcum[mark] += size; 1115 } 1116 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1117 cum = 0; 1118 for (i=0;i<nee;i++) { 1119 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1120 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1121 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1122 cum = PetscMax(cum,size); 1123 } 1124 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1125 ierr = PetscFree(marks);CHKERRQ(ierr); 1126 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1127 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1128 1129 /* Workspace for lapack inner calls and VecSetValues */ 1130 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1131 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1132 for (i=0;i<maxsize;i++) vals[i] = 1.; 1133 1134 /* Create vectors for quadrature rules */ 1135 /* TODO preserve other quadratures */ 1136 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 1137 for (i=0;i<nquads;i++) { 1138 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 1139 ierr = VecSetLocalToGlobalMapping(quads[i],al2g);CHKERRQ(ierr); 1140 } 1141 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 1142 1143 /* Create change of basis matrix (preallocation can be improved) */ 1144 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1145 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1146 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1147 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1148 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1149 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1150 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1151 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1152 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1153 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1154 1155 /* Defaults to identity */ 1156 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1157 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1158 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1159 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1160 1161 /* Create discrete gradient for the coarser level if needed */ 1162 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1163 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1164 if (pcbddc->current_level < pcbddc->max_levels) { 1165 ISLocalToGlobalMapping cel2g,cvl2g; 1166 IS wis,gwis; 1167 PetscInt cnv,cne; 1168 1169 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1170 if (fl2g) { 1171 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1172 } else { 1173 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1174 pcbddc->nedclocal = wis; 1175 } 1176 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1177 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1178 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1179 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1180 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1181 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1182 1183 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1184 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1185 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1186 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1187 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1188 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1189 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1190 1191 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1192 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1193 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1194 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1195 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1196 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1197 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1198 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1199 } 1200 1201 #if defined(PRINT_GDET) 1202 inc = 0; 1203 lev = pcbddc->current_level; 1204 #endif 1205 for (i=0;i<nee;i++) { 1206 Mat Gins = NULL, GKins = NULL; 1207 IS cornersis = NULL; 1208 PetscScalar cvals[2]; 1209 1210 if (pcbddc->nedcG) { 1211 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1212 } 1213 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1214 if (Gins && GKins) { 1215 PetscScalar *data; 1216 const PetscInt *rows,*cols; 1217 PetscInt nrh,nch,nrc,ncc; 1218 1219 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1220 /* H1 */ 1221 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1222 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1223 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1224 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1225 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1226 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1227 /* complement */ 1228 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1229 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1230 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 1231 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 1232 if (ncc != 1 && pcbddc->nedcG) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the dicrete gradient for the next level with ncc %d",ncc); 1233 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1234 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1235 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1236 /* Gins kernel quadratures */ 1237 for (j=0;j<ncc;j++) { 1238 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1239 } 1240 /* H1 average */ 1241 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1242 1243 /* coarse discrete gradient */ 1244 if (pcbddc->nedcG) { 1245 PetscInt cols[2]; 1246 1247 cols[0] = 2*i; 1248 cols[1] = 2*i+1; 1249 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1250 } 1251 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1252 } 1253 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1254 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1255 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1256 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1257 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1258 } 1259 1260 /* Start assembling */ 1261 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1262 for (i=0;i<nquads;i++) { 1263 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1264 } 1265 if (pcbddc->nedcG) { 1266 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1267 } 1268 1269 /* Free */ 1270 if (fl2g) { 1271 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1272 for (i=0;i<nee;i++) { 1273 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1274 } 1275 ierr = PetscFree(eedges);CHKERRQ(ierr); 1276 } 1277 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1278 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1279 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1280 ierr = PetscFree(extrow);CHKERRQ(ierr); 1281 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1282 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1283 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1284 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1285 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1286 ierr = PetscFree(vals);CHKERRQ(ierr); 1287 ierr = PetscFree(corners);CHKERRQ(ierr); 1288 ierr = PetscFree(cedges);CHKERRQ(ierr); 1289 ierr = PetscFree(extrows);CHKERRQ(ierr); 1290 ierr = PetscFree(extcols);CHKERRQ(ierr); 1291 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1292 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1293 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1294 1295 /* Complete assembling */ 1296 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1297 for (i=0;i<nquads;i++) { 1298 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1299 } 1300 for (i=0;i<nquads;i++) { 1301 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1302 } 1303 ierr = PetscFree(quads);CHKERRQ(ierr); 1304 if (pcbddc->nedcG) { 1305 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1306 #if 0 1307 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1308 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1309 #endif 1310 } 1311 1312 /* set change of basis */ 1313 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1314 #if 0 1315 if (pcbddc->current_level) { 1316 PetscViewer viewer; 1317 char filename[256]; 1318 Mat Tned; 1319 IS sub; 1320 PetscInt rst; 1321 1322 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 1324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 1325 if (nedfieldlocal) { 1326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 1328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1329 } else { 1330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 1331 } 1332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1334 ierr = MatGetOwnershipRange(pc->pmat,&rst,NULL);CHKERRQ(ierr); 1335 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 1336 if (matis->sf_rootdata[i]) { 1337 matis->sf_rootdata[cum++] = i + rst; 1338 } 1339 } 1340 PetscPrintf(PETSC_COMM_SELF,"[%D] LEVEL %d MY ne %d cum %d\n",PetscGlobalRank,pcbddc->current_level,ne,cum); 1341 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cum,matis->sf_rootdata,PETSC_USE_POINTER,&sub);CHKERRQ(ierr); 1342 ierr = MatGetSubMatrix(T,sub,sub,MAT_INITIAL_MATRIX,&Tned);CHKERRQ(ierr); 1343 ierr = ISDestroy(&sub);CHKERRQ(ierr); 1344 1345 sprintf(filename,"Change_l%d.m",pcbddc->current_level); 1346 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)Tned),filename,&viewer);CHKERRQ(ierr); 1347 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1348 ierr = PetscObjectSetName((PetscObject)Tned,"T");CHKERRQ(ierr); 1349 ierr = MatView(Tned,viewer);CHKERRQ(ierr); 1350 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1351 ierr = MatDestroy(&Tned);CHKERRQ(ierr); 1352 } 1353 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1354 #endif 1355 ierr = MatDestroy(&T);CHKERRQ(ierr); 1356 1357 /* set quadratures */ 1358 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1359 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1360 1361 PetscFunctionReturn(0); 1362 } 1363 1364 /* the near-null space of BDDC carries information on quadrature weights, 1365 and these can be collinear -> so cheat with MatNullSpaceCreate 1366 and create a suitable set of basis vectors first */ 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1369 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1370 { 1371 PetscErrorCode ierr; 1372 PetscInt i; 1373 1374 PetscFunctionBegin; 1375 for (i=0;i<nvecs;i++) { 1376 PetscInt first,last; 1377 1378 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1379 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1380 if (i>=first && i < last) { 1381 PetscScalar *data; 1382 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1383 if (!has_const) { 1384 data[i-first] = 1.; 1385 } else { 1386 data[2*i-first] = 1./PetscSqrtReal(2.); 1387 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1388 } 1389 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1390 } 1391 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1392 } 1393 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1394 for (i=0;i<nvecs;i++) { /* reset vectors */ 1395 PetscInt first,last; 1396 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1397 if (i>=first && i < last) { 1398 PetscScalar *data; 1399 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1400 if (!has_const) { 1401 data[i-first] = 0.; 1402 } else { 1403 data[2*i-first] = 0.; 1404 data[2*i-first+1] = 0.; 1405 } 1406 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1407 } 1408 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1415 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1416 { 1417 Mat loc_divudotp; 1418 Vec p,v,vins,quad_vec,*quad_vecs; 1419 ISLocalToGlobalMapping map; 1420 IS *faces,*edges; 1421 PetscScalar *vals; 1422 const PetscScalar *array; 1423 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1424 PetscMPIInt rank; 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1429 if (graph->twodim) { 1430 lmaxneighs = 2; 1431 } else { 1432 lmaxneighs = 1; 1433 for (i=0;i<ne;i++) { 1434 const PetscInt *idxs; 1435 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1436 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1437 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1438 } 1439 lmaxneighs++; /* graph count does not include self */ 1440 } 1441 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1442 maxsize = 0; 1443 for (i=0;i<ne;i++) { 1444 PetscInt nn; 1445 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1446 maxsize = PetscMax(maxsize,nn); 1447 } 1448 for (i=0;i<nf;i++) { 1449 PetscInt nn; 1450 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1451 maxsize = PetscMax(maxsize,nn); 1452 } 1453 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1454 /* create vectors to hold quadrature weights */ 1455 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1456 if (!transpose) { 1457 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1458 } else { 1459 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1460 } 1461 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1462 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1463 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1464 for (i=0;i<maxneighs;i++) { 1465 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1466 } 1467 1468 /* compute local quad vec */ 1469 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1470 if (!transpose) { 1471 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1472 } else { 1473 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1474 } 1475 ierr = VecSet(p,1.);CHKERRQ(ierr); 1476 if (!transpose) { 1477 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1478 } else { 1479 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1480 } 1481 if (vl2l) { 1482 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1483 } else { 1484 vins = v; 1485 } 1486 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1487 ierr = VecDestroy(&p);CHKERRQ(ierr); 1488 1489 /* insert in global quadrature vecs */ 1490 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1491 for (i=0;i<nf;i++) { 1492 const PetscInt *idxs; 1493 PetscInt idx,nn,j; 1494 1495 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1496 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1497 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1498 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1499 idx = -(idx+1); 1500 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1501 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1502 } 1503 for (i=0;i<ne;i++) { 1504 const PetscInt *idxs; 1505 PetscInt idx,nn,j; 1506 1507 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1508 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1509 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1510 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1511 idx = -(idx+1); 1512 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1513 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1514 } 1515 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1516 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1517 if (vl2l) { 1518 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1519 } 1520 ierr = VecDestroy(&v);CHKERRQ(ierr); 1521 ierr = PetscFree(vals);CHKERRQ(ierr); 1522 1523 /* assemble near null space */ 1524 for (i=0;i<maxneighs;i++) { 1525 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1526 } 1527 for (i=0;i<maxneighs;i++) { 1528 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1529 } 1530 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1537 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1538 { 1539 PetscErrorCode ierr; 1540 Vec local,global; 1541 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1542 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1543 1544 PetscFunctionBegin; 1545 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1546 /* need to convert from global to local topology information and remove references to information in global ordering */ 1547 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1548 if (pcbddc->user_provided_isfordofs) { 1549 if (pcbddc->n_ISForDofs) { 1550 PetscInt i; 1551 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1552 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1553 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1554 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1555 } 1556 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1557 pcbddc->n_ISForDofs = 0; 1558 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1559 } 1560 } else { 1561 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1562 PetscInt i, n = matis->A->rmap->n; 1563 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1564 if (i > 1) { 1565 pcbddc->n_ISForDofsLocal = i; 1566 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1567 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1568 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1569 } 1570 } 1571 } 1572 } 1573 1574 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1575 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1576 } 1577 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1578 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1579 } 1580 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1581 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1582 } 1583 ierr = VecDestroy(&global);CHKERRQ(ierr); 1584 ierr = VecDestroy(&local);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #undef __FUNCT__ 1589 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1590 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1591 { 1592 PC_IS *pcis = (PC_IS*)(pc->data); 1593 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1594 PetscErrorCode ierr; 1595 1596 PetscFunctionBegin; 1597 if (!pcbddc->benign_have_null) { 1598 PetscFunctionReturn(0); 1599 } 1600 if (pcbddc->ChangeOfBasisMatrix) { 1601 Vec swap; 1602 1603 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1604 swap = pcbddc->work_change; 1605 pcbddc->work_change = r; 1606 r = swap; 1607 } 1608 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1611 ierr = VecSet(z,0.);CHKERRQ(ierr); 1612 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1613 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1614 if (pcbddc->ChangeOfBasisMatrix) { 1615 pcbddc->work_change = r; 1616 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1617 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1618 } 1619 PetscFunctionReturn(0); 1620 } 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1624 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1625 { 1626 PCBDDCBenignMatMult_ctx ctx; 1627 PetscErrorCode ierr; 1628 PetscBool apply_right,apply_left,reset_x; 1629 1630 PetscFunctionBegin; 1631 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1632 if (transpose) { 1633 apply_right = ctx->apply_left; 1634 apply_left = ctx->apply_right; 1635 } else { 1636 apply_right = ctx->apply_right; 1637 apply_left = ctx->apply_left; 1638 } 1639 reset_x = PETSC_FALSE; 1640 if (apply_right) { 1641 const PetscScalar *ax; 1642 PetscInt nl,i; 1643 1644 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1645 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1646 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1647 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1648 for (i=0;i<ctx->benign_n;i++) { 1649 PetscScalar sum,val; 1650 const PetscInt *idxs; 1651 PetscInt nz,j; 1652 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1653 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1654 sum = 0.; 1655 if (ctx->apply_p0) { 1656 val = ctx->work[idxs[nz-1]]; 1657 for (j=0;j<nz-1;j++) { 1658 sum += ctx->work[idxs[j]]; 1659 ctx->work[idxs[j]] += val; 1660 } 1661 } else { 1662 for (j=0;j<nz-1;j++) { 1663 sum += ctx->work[idxs[j]]; 1664 } 1665 } 1666 ctx->work[idxs[nz-1]] -= sum; 1667 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1668 } 1669 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1670 reset_x = PETSC_TRUE; 1671 } 1672 if (transpose) { 1673 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1674 } else { 1675 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1676 } 1677 if (reset_x) { 1678 ierr = VecResetArray(x);CHKERRQ(ierr); 1679 } 1680 if (apply_left) { 1681 PetscScalar *ay; 1682 PetscInt i; 1683 1684 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1685 for (i=0;i<ctx->benign_n;i++) { 1686 PetscScalar sum,val; 1687 const PetscInt *idxs; 1688 PetscInt nz,j; 1689 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1690 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1691 val = -ay[idxs[nz-1]]; 1692 if (ctx->apply_p0) { 1693 sum = 0.; 1694 for (j=0;j<nz-1;j++) { 1695 sum += ay[idxs[j]]; 1696 ay[idxs[j]] += val; 1697 } 1698 ay[idxs[nz-1]] += sum; 1699 } else { 1700 for (j=0;j<nz-1;j++) { 1701 ay[idxs[j]] += val; 1702 } 1703 ay[idxs[nz-1]] = 0.; 1704 } 1705 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1706 } 1707 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1708 } 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #undef __FUNCT__ 1713 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1714 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1715 { 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1725 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1726 { 1727 PetscErrorCode ierr; 1728 1729 PetscFunctionBegin; 1730 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "PCBDDCBenignShellMat" 1736 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1737 { 1738 PC_IS *pcis = (PC_IS*)pc->data; 1739 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1740 PCBDDCBenignMatMult_ctx ctx; 1741 PetscErrorCode ierr; 1742 1743 PetscFunctionBegin; 1744 if (!restore) { 1745 Mat A_IB,A_BI; 1746 PetscScalar *work; 1747 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1748 1749 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1750 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1751 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1752 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1753 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1754 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1755 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1756 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1757 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1758 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1759 ctx->apply_left = PETSC_TRUE; 1760 ctx->apply_right = PETSC_FALSE; 1761 ctx->apply_p0 = PETSC_FALSE; 1762 ctx->benign_n = pcbddc->benign_n; 1763 if (reuse) { 1764 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1765 ctx->free = PETSC_FALSE; 1766 } else { /* TODO: could be optimized for successive solves */ 1767 ISLocalToGlobalMapping N_to_D; 1768 PetscInt i; 1769 1770 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1771 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1772 for (i=0;i<pcbddc->benign_n;i++) { 1773 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1774 } 1775 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1776 ctx->free = PETSC_TRUE; 1777 } 1778 ctx->A = pcis->A_IB; 1779 ctx->work = work; 1780 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1781 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1782 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1783 pcis->A_IB = A_IB; 1784 1785 /* A_BI as A_IB^T */ 1786 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1787 pcbddc->benign_original_mat = pcis->A_BI; 1788 pcis->A_BI = A_BI; 1789 } else { 1790 if (!pcbddc->benign_original_mat) { 1791 PetscFunctionReturn(0); 1792 } 1793 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1794 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1795 pcis->A_IB = ctx->A; 1796 ctx->A = NULL; 1797 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1798 pcis->A_BI = pcbddc->benign_original_mat; 1799 pcbddc->benign_original_mat = NULL; 1800 if (ctx->free) { 1801 PetscInt i; 1802 for (i=0;i<ctx->benign_n;i++) { 1803 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1804 } 1805 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1806 } 1807 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1808 ierr = PetscFree(ctx);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 1813 /* used just in bddc debug mode */ 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "PCBDDCBenignProject" 1816 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1817 { 1818 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1819 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1820 Mat An; 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1825 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1826 if (is1) { 1827 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1828 ierr = MatDestroy(&An);CHKERRQ(ierr); 1829 } else { 1830 *B = An; 1831 } 1832 PetscFunctionReturn(0); 1833 } 1834 1835 /* TODO: add reuse flag */ 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatSeqAIJCompress" 1838 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1839 { 1840 Mat Bt; 1841 PetscScalar *a,*bdata; 1842 const PetscInt *ii,*ij; 1843 PetscInt m,n,i,nnz,*bii,*bij; 1844 PetscBool flg_row; 1845 PetscErrorCode ierr; 1846 1847 PetscFunctionBegin; 1848 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1849 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1850 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1851 nnz = n; 1852 for (i=0;i<ii[n];i++) { 1853 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1854 } 1855 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1856 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1857 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1858 nnz = 0; 1859 bii[0] = 0; 1860 for (i=0;i<n;i++) { 1861 PetscInt j; 1862 for (j=ii[i];j<ii[i+1];j++) { 1863 PetscScalar entry = a[j]; 1864 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1865 bij[nnz] = ij[j]; 1866 bdata[nnz] = entry; 1867 nnz++; 1868 } 1869 } 1870 bii[i+1] = nnz; 1871 } 1872 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1873 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1874 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1875 { 1876 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1877 b->free_a = PETSC_TRUE; 1878 b->free_ij = PETSC_TRUE; 1879 } 1880 *B = Bt; 1881 PetscFunctionReturn(0); 1882 } 1883 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatDetectDisconnectedComponents" 1886 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1887 { 1888 Mat B; 1889 IS is_dummy,*cc_n; 1890 ISLocalToGlobalMapping l2gmap_dummy; 1891 PCBDDCGraph graph; 1892 PetscInt i,n; 1893 PetscInt *xadj,*adjncy; 1894 PetscInt *xadj_filtered,*adjncy_filtered; 1895 PetscBool flg_row,isseqaij; 1896 PetscErrorCode ierr; 1897 1898 PetscFunctionBegin; 1899 if (!A->rmap->N || !A->cmap->N) { 1900 *ncc = 0; 1901 *cc = NULL; 1902 PetscFunctionReturn(0); 1903 } 1904 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1905 if (!isseqaij && filter) { 1906 PetscBool isseqdense; 1907 1908 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1909 if (!isseqdense) { 1910 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1911 } else { /* TODO: rectangular case and LDA */ 1912 PetscScalar *array; 1913 PetscReal chop=1.e-6; 1914 1915 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1916 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1917 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1918 for (i=0;i<n;i++) { 1919 PetscInt j; 1920 for (j=i+1;j<n;j++) { 1921 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1922 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1923 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1924 } 1925 } 1926 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1927 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1928 } 1929 } else { 1930 B = A; 1931 } 1932 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1933 1934 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1935 if (filter) { 1936 PetscScalar *data; 1937 PetscInt j,cum; 1938 1939 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1940 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1941 cum = 0; 1942 for (i=0;i<n;i++) { 1943 PetscInt t; 1944 1945 for (j=xadj[i];j<xadj[i+1];j++) { 1946 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1947 continue; 1948 } 1949 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1950 } 1951 t = xadj_filtered[i]; 1952 xadj_filtered[i] = cum; 1953 cum += t; 1954 } 1955 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1956 } else { 1957 xadj_filtered = NULL; 1958 adjncy_filtered = NULL; 1959 } 1960 1961 /* compute local connected components using PCBDDCGraph */ 1962 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1963 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1964 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1965 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1966 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1967 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1968 if (xadj_filtered) { 1969 graph->xadj = xadj_filtered; 1970 graph->adjncy = adjncy_filtered; 1971 } else { 1972 graph->xadj = xadj; 1973 graph->adjncy = adjncy; 1974 } 1975 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1976 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1977 /* partial clean up */ 1978 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1979 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1980 if (A != B) { 1981 ierr = MatDestroy(&B);CHKERRQ(ierr); 1982 } 1983 1984 /* get back data */ 1985 if (ncc) *ncc = graph->ncc; 1986 if (cc) { 1987 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1988 for (i=0;i<graph->ncc;i++) { 1989 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1990 } 1991 *cc = cc_n; 1992 } 1993 /* clean up graph */ 1994 graph->xadj = 0; 1995 graph->adjncy = 0; 1996 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "PCBDDCBenignCheck" 2002 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2003 { 2004 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2005 PC_IS* pcis = (PC_IS*)(pc->data); 2006 IS dirIS = NULL; 2007 PetscInt i; 2008 PetscErrorCode ierr; 2009 2010 PetscFunctionBegin; 2011 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2012 if (zerodiag) { 2013 Mat A; 2014 Vec vec3_N; 2015 PetscScalar *vals; 2016 const PetscInt *idxs; 2017 PetscInt nz,*count; 2018 2019 /* p0 */ 2020 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2021 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2022 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2023 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2024 for (i=0;i<nz;i++) vals[i] = 1.; 2025 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2026 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2027 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2028 /* v_I */ 2029 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2030 for (i=0;i<nz;i++) vals[i] = 0.; 2031 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2032 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2033 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2034 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2035 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2036 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2037 if (dirIS) { 2038 PetscInt n; 2039 2040 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2041 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2042 for (i=0;i<n;i++) vals[i] = 0.; 2043 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2044 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2045 } 2046 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2047 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2048 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2049 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2050 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2051 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2052 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2053 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2054 ierr = PetscFree(vals);CHKERRQ(ierr); 2055 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2056 2057 /* there should not be any pressure dofs lying on the interface */ 2058 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2059 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2060 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2061 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2062 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2063 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2064 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2065 ierr = PetscFree(count);CHKERRQ(ierr); 2066 } 2067 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2068 2069 /* check PCBDDCBenignGetOrSetP0 */ 2070 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2071 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2072 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2073 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2074 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2075 for (i=0;i<pcbddc->benign_n;i++) { 2076 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2077 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2078 } 2079 PetscFunctionReturn(0); 2080 } 2081 2082 #undef __FUNCT__ 2083 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 2084 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2085 { 2086 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2087 IS pressures,zerodiag,*zerodiag_subs; 2088 PetscInt nz,n; 2089 PetscInt *interior_dofs,n_interior_dofs; 2090 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 2091 PetscErrorCode ierr; 2092 2093 PetscFunctionBegin; 2094 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2095 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2096 for (n=0;n<pcbddc->benign_n;n++) { 2097 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2098 } 2099 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2100 pcbddc->benign_n = 0; 2101 /* if a local info on dofs is present, assumes that the last field represents "pressures" 2102 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2103 Checks if all the pressure dofs in each subdomain have a zero diagonal 2104 If not, a change of basis on pressures is not needed 2105 since the local Schur complements are already SPD 2106 */ 2107 has_null_pressures = PETSC_TRUE; 2108 have_null = PETSC_TRUE; 2109 if (pcbddc->n_ISForDofsLocal) { 2110 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2111 2112 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2113 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2114 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2115 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2116 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2117 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2118 if (!sorted) { 2119 ierr = ISSort(pressures);CHKERRQ(ierr); 2120 } 2121 } else { 2122 pressures = NULL; 2123 } 2124 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2125 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2126 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2127 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2128 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2129 if (!sorted) { 2130 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2131 } 2132 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2133 if (!nz) { 2134 if (n) have_null = PETSC_FALSE; 2135 has_null_pressures = PETSC_FALSE; 2136 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2137 } 2138 recompute_zerodiag = PETSC_FALSE; 2139 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2140 zerodiag_subs = NULL; 2141 pcbddc->benign_n = 0; 2142 n_interior_dofs = 0; 2143 interior_dofs = NULL; 2144 if (pcbddc->current_level) { /* need to compute interior nodes */ 2145 PetscInt n,i,j; 2146 PetscInt n_neigh,*neigh,*n_shared,**shared; 2147 PetscInt *iwork; 2148 2149 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2150 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2151 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2152 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2153 for (i=1;i<n_neigh;i++) 2154 for (j=0;j<n_shared[i];j++) 2155 iwork[shared[i][j]] += 1; 2156 for (i=0;i<n;i++) 2157 if (!iwork[i]) 2158 interior_dofs[n_interior_dofs++] = i; 2159 ierr = PetscFree(iwork);CHKERRQ(ierr); 2160 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2161 } 2162 if (has_null_pressures) { 2163 IS *subs; 2164 PetscInt nsubs,i,j,nl; 2165 const PetscInt *idxs; 2166 PetscScalar *array; 2167 Vec *work; 2168 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2169 2170 subs = pcbddc->local_subs; 2171 nsubs = pcbddc->n_local_subs; 2172 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2173 if (pcbddc->current_level) { 2174 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2175 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2176 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2177 /* work[0] = 1_p */ 2178 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2179 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2180 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2181 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2182 /* work[0] = 1_v */ 2183 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2184 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2185 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2186 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2187 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2188 } 2189 if (nsubs > 1) { 2190 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2191 for (i=0;i<nsubs;i++) { 2192 ISLocalToGlobalMapping l2g; 2193 IS t_zerodiag_subs; 2194 PetscInt nl; 2195 2196 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2197 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2198 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2199 if (nl) { 2200 PetscBool valid = PETSC_TRUE; 2201 2202 if (pcbddc->current_level) { 2203 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2204 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2205 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2206 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2207 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2208 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2209 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2210 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2211 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2212 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2213 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2214 for (j=0;j<n_interior_dofs;j++) { 2215 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2216 valid = PETSC_FALSE; 2217 break; 2218 } 2219 } 2220 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2221 } 2222 if (valid && pcbddc->NeumannBoundariesLocal) { 2223 IS t_bc; 2224 PetscInt nzb; 2225 2226 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 2227 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 2228 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 2229 if (nzb) valid = PETSC_FALSE; 2230 } 2231 if (valid && pressures) { 2232 IS t_pressure_subs; 2233 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2234 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2235 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2236 } 2237 if (valid) { 2238 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2239 pcbddc->benign_n++; 2240 } else { 2241 recompute_zerodiag = PETSC_TRUE; 2242 } 2243 } 2244 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2245 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2246 } 2247 } else { /* there's just one subdomain (or zero if they have not been detected */ 2248 PetscBool valid = PETSC_TRUE; 2249 2250 if (pcbddc->NeumannBoundariesLocal) { 2251 PetscInt nzb; 2252 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 2253 if (nzb) valid = PETSC_FALSE; 2254 } 2255 if (valid && pressures) { 2256 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2257 } 2258 if (valid && pcbddc->current_level) { 2259 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2260 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2261 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2262 for (j=0;j<n_interior_dofs;j++) { 2263 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2264 valid = PETSC_FALSE; 2265 break; 2266 } 2267 } 2268 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2269 } 2270 if (valid) { 2271 pcbddc->benign_n = 1; 2272 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2273 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2274 zerodiag_subs[0] = zerodiag; 2275 } 2276 } 2277 if (pcbddc->current_level) { 2278 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2279 } 2280 } 2281 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2282 2283 if (!pcbddc->benign_n) { 2284 PetscInt n; 2285 2286 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2287 recompute_zerodiag = PETSC_FALSE; 2288 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2289 if (n) { 2290 has_null_pressures = PETSC_FALSE; 2291 have_null = PETSC_FALSE; 2292 } 2293 } 2294 2295 /* final check for null pressures */ 2296 if (zerodiag && pressures) { 2297 PetscInt nz,np; 2298 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2299 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2300 if (nz != np) have_null = PETSC_FALSE; 2301 } 2302 2303 if (recompute_zerodiag) { 2304 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2305 if (pcbddc->benign_n == 1) { 2306 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2307 zerodiag = zerodiag_subs[0]; 2308 } else { 2309 PetscInt i,nzn,*new_idxs; 2310 2311 nzn = 0; 2312 for (i=0;i<pcbddc->benign_n;i++) { 2313 PetscInt ns; 2314 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2315 nzn += ns; 2316 } 2317 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2318 nzn = 0; 2319 for (i=0;i<pcbddc->benign_n;i++) { 2320 PetscInt ns,*idxs; 2321 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2322 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2323 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2324 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2325 nzn += ns; 2326 } 2327 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2328 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2329 } 2330 have_null = PETSC_FALSE; 2331 } 2332 2333 /* Prepare matrix to compute no-net-flux */ 2334 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2335 Mat A,loc_divudotp; 2336 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2337 IS row,col,isused = NULL; 2338 PetscInt M,N,n,st,n_isused; 2339 2340 if (pressures) { 2341 isused = pressures; 2342 } else { 2343 isused = zerodiag; 2344 } 2345 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2346 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2347 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2348 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2349 n_isused = 0; 2350 if (isused) { 2351 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2352 } 2353 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2354 st = st-n_isused; 2355 if (n) { 2356 const PetscInt *gidxs; 2357 2358 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2359 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2360 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2361 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2362 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2363 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2364 } else { 2365 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2366 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2367 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2368 } 2369 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2370 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2371 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2372 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2373 ierr = ISDestroy(&row);CHKERRQ(ierr); 2374 ierr = ISDestroy(&col);CHKERRQ(ierr); 2375 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2376 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2377 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2378 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2379 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2380 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2381 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2382 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2383 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2384 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2385 } 2386 2387 /* change of basis and p0 dofs */ 2388 if (has_null_pressures) { 2389 IS zerodiagc; 2390 const PetscInt *idxs,*idxsc; 2391 PetscInt i,s,*nnz; 2392 2393 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2394 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2395 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2396 /* local change of basis for pressures */ 2397 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2398 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2399 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2400 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2401 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2402 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2403 for (i=0;i<pcbddc->benign_n;i++) { 2404 PetscInt nzs,j; 2405 2406 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2407 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2408 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2409 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2410 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2411 } 2412 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2413 ierr = PetscFree(nnz);CHKERRQ(ierr); 2414 /* set identity on velocities */ 2415 for (i=0;i<n-nz;i++) { 2416 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2417 } 2418 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2419 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2420 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2421 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2422 /* set change on pressures */ 2423 for (s=0;s<pcbddc->benign_n;s++) { 2424 PetscScalar *array; 2425 PetscInt nzs; 2426 2427 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2428 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2429 for (i=0;i<nzs-1;i++) { 2430 PetscScalar vals[2]; 2431 PetscInt cols[2]; 2432 2433 cols[0] = idxs[i]; 2434 cols[1] = idxs[nzs-1]; 2435 vals[0] = 1.; 2436 vals[1] = 1.; 2437 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2438 } 2439 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2440 for (i=0;i<nzs-1;i++) array[i] = -1.; 2441 array[nzs-1] = 1.; 2442 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2443 /* store local idxs for p0 */ 2444 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2445 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2446 ierr = PetscFree(array);CHKERRQ(ierr); 2447 } 2448 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2449 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2450 /* project if needed */ 2451 if (pcbddc->benign_change_explicit) { 2452 Mat M; 2453 2454 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2455 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2456 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2457 ierr = MatDestroy(&M);CHKERRQ(ierr); 2458 } 2459 /* store global idxs for p0 */ 2460 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2461 } 2462 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2463 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2464 2465 /* determines if the coarse solver will be singular or not */ 2466 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2467 /* determines if the problem has subdomains with 0 pressure block */ 2468 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2469 *zerodiaglocal = zerodiag; 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2475 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2476 { 2477 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2478 PetscScalar *array; 2479 PetscErrorCode ierr; 2480 2481 PetscFunctionBegin; 2482 if (!pcbddc->benign_sf) { 2483 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2484 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2485 } 2486 if (get) { 2487 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2488 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2489 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2490 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2491 } else { 2492 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2493 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2494 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2495 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2496 } 2497 PetscFunctionReturn(0); 2498 } 2499 2500 #undef __FUNCT__ 2501 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2502 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2503 { 2504 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2505 PetscErrorCode ierr; 2506 2507 PetscFunctionBegin; 2508 /* TODO: add error checking 2509 - avoid nested pop (or push) calls. 2510 - cannot push before pop. 2511 - cannot call this if pcbddc->local_mat is NULL 2512 */ 2513 if (!pcbddc->benign_n) { 2514 PetscFunctionReturn(0); 2515 } 2516 if (pop) { 2517 if (pcbddc->benign_change_explicit) { 2518 IS is_p0; 2519 MatReuse reuse; 2520 2521 /* extract B_0 */ 2522 reuse = MAT_INITIAL_MATRIX; 2523 if (pcbddc->benign_B0) { 2524 reuse = MAT_REUSE_MATRIX; 2525 } 2526 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2527 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2528 /* remove rows and cols from local problem */ 2529 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2530 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2531 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2532 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2533 } else { 2534 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2535 PetscScalar *vals; 2536 PetscInt i,n,*idxs_ins; 2537 2538 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2539 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2540 if (!pcbddc->benign_B0) { 2541 PetscInt *nnz; 2542 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2543 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2544 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2545 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2546 for (i=0;i<pcbddc->benign_n;i++) { 2547 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2548 nnz[i] = n - nnz[i]; 2549 } 2550 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2551 ierr = PetscFree(nnz);CHKERRQ(ierr); 2552 } 2553 2554 for (i=0;i<pcbddc->benign_n;i++) { 2555 PetscScalar *array; 2556 PetscInt *idxs,j,nz,cum; 2557 2558 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2559 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2560 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2561 for (j=0;j<nz;j++) vals[j] = 1.; 2562 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2563 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2564 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2565 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2566 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2567 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2568 cum = 0; 2569 for (j=0;j<n;j++) { 2570 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2571 vals[cum] = array[j]; 2572 idxs_ins[cum] = j; 2573 cum++; 2574 } 2575 } 2576 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2577 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2578 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2579 } 2580 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2581 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2582 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2583 } 2584 } else { /* push */ 2585 if (pcbddc->benign_change_explicit) { 2586 PetscInt i; 2587 2588 for (i=0;i<pcbddc->benign_n;i++) { 2589 PetscScalar *B0_vals; 2590 PetscInt *B0_cols,B0_ncol; 2591 2592 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2593 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2594 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2595 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2596 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2597 } 2598 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2599 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2600 } else { 2601 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2602 } 2603 } 2604 PetscFunctionReturn(0); 2605 } 2606 2607 #undef __FUNCT__ 2608 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2609 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2610 { 2611 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2612 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2613 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2614 PetscBLASInt *B_iwork,*B_ifail; 2615 PetscScalar *work,lwork; 2616 PetscScalar *St,*S,*eigv; 2617 PetscScalar *Sarray,*Starray; 2618 PetscReal *eigs,thresh; 2619 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2620 PetscBool allocated_S_St; 2621 #if defined(PETSC_USE_COMPLEX) 2622 PetscReal *rwork; 2623 #endif 2624 PetscErrorCode ierr; 2625 2626 PetscFunctionBegin; 2627 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2628 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2629 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2630 2631 if (pcbddc->dbg_flag) { 2632 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2633 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2634 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2635 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2636 } 2637 2638 if (pcbddc->dbg_flag) { 2639 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2640 } 2641 2642 /* max size of subsets */ 2643 mss = 0; 2644 for (i=0;i<sub_schurs->n_subs;i++) { 2645 PetscInt subset_size; 2646 2647 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2648 mss = PetscMax(mss,subset_size); 2649 } 2650 2651 /* min/max and threshold */ 2652 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2653 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2654 nmax = PetscMax(nmin,nmax); 2655 allocated_S_St = PETSC_FALSE; 2656 if (nmin) { 2657 allocated_S_St = PETSC_TRUE; 2658 } 2659 2660 /* allocate lapack workspace */ 2661 cum = cum2 = 0; 2662 maxneigs = 0; 2663 for (i=0;i<sub_schurs->n_subs;i++) { 2664 PetscInt n,subset_size; 2665 2666 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2667 n = PetscMin(subset_size,nmax); 2668 cum += subset_size; 2669 cum2 += subset_size*n; 2670 maxneigs = PetscMax(maxneigs,n); 2671 } 2672 if (mss) { 2673 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2674 PetscBLASInt B_itype = 1; 2675 PetscBLASInt B_N = mss; 2676 PetscReal zero = 0.0; 2677 PetscReal eps = 0.0; /* dlamch? */ 2678 2679 B_lwork = -1; 2680 S = NULL; 2681 St = NULL; 2682 eigs = NULL; 2683 eigv = NULL; 2684 B_iwork = NULL; 2685 B_ifail = NULL; 2686 #if defined(PETSC_USE_COMPLEX) 2687 rwork = NULL; 2688 #endif 2689 thresh = 1.0; 2690 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2691 #if defined(PETSC_USE_COMPLEX) 2692 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2693 #else 2694 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2695 #endif 2696 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2697 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2698 } else { 2699 /* TODO */ 2700 } 2701 } else { 2702 lwork = 0; 2703 } 2704 2705 nv = 0; 2706 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2707 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2708 } 2709 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2710 if (allocated_S_St) { 2711 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2712 } 2713 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2714 #if defined(PETSC_USE_COMPLEX) 2715 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2716 #endif 2717 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2718 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2719 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2720 nv+cum,&pcbddc->adaptive_constraints_idxs, 2721 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2722 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2723 2724 maxneigs = 0; 2725 cum = cumarray = 0; 2726 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2727 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2728 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2729 const PetscInt *idxs; 2730 2731 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2732 for (cum=0;cum<nv;cum++) { 2733 pcbddc->adaptive_constraints_n[cum] = 1; 2734 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2735 pcbddc->adaptive_constraints_data[cum] = 1.0; 2736 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2737 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2738 } 2739 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2740 } 2741 2742 if (mss) { /* multilevel */ 2743 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2744 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2745 } 2746 2747 thresh = pcbddc->adaptive_threshold; 2748 for (i=0;i<sub_schurs->n_subs;i++) { 2749 const PetscInt *idxs; 2750 PetscReal upper,lower; 2751 PetscInt j,subset_size,eigs_start = 0; 2752 PetscBLASInt B_N; 2753 PetscBool same_data = PETSC_FALSE; 2754 2755 if (pcbddc->use_deluxe_scaling) { 2756 upper = PETSC_MAX_REAL; 2757 lower = thresh; 2758 } else { 2759 upper = 1./thresh; 2760 lower = 0.; 2761 } 2762 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2763 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2764 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2765 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2766 if (sub_schurs->is_hermitian) { 2767 PetscInt j,k; 2768 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2769 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2770 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2771 } 2772 for (j=0;j<subset_size;j++) { 2773 for (k=j;k<subset_size;k++) { 2774 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2775 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2776 } 2777 } 2778 } else { 2779 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2780 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2781 } 2782 } else { 2783 S = Sarray + cumarray; 2784 St = Starray + cumarray; 2785 } 2786 /* see if we can save some work */ 2787 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2788 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2789 } 2790 2791 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2792 B_neigs = 0; 2793 } else { 2794 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2795 PetscBLASInt B_itype = 1; 2796 PetscBLASInt B_IL, B_IU; 2797 PetscReal eps = -1.0; /* dlamch? */ 2798 PetscInt nmin_s; 2799 PetscBool compute_range = PETSC_FALSE; 2800 2801 if (pcbddc->dbg_flag) { 2802 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2803 } 2804 2805 compute_range = PETSC_FALSE; 2806 if (thresh > 1.+PETSC_SMALL && !same_data) { 2807 compute_range = PETSC_TRUE; 2808 } 2809 2810 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2811 if (compute_range) { 2812 2813 /* ask for eigenvalues larger than thresh */ 2814 #if defined(PETSC_USE_COMPLEX) 2815 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2816 #else 2817 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2818 #endif 2819 } else if (!same_data) { 2820 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2821 B_IL = 1; 2822 #if defined(PETSC_USE_COMPLEX) 2823 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2824 #else 2825 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2826 #endif 2827 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2828 PetscInt k; 2829 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2830 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2831 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2832 nmin = nmax; 2833 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2834 for (k=0;k<nmax;k++) { 2835 eigs[k] = 1./PETSC_SMALL; 2836 eigv[k*(subset_size+1)] = 1.0; 2837 } 2838 } 2839 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2840 if (B_ierr) { 2841 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2842 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2843 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2844 } 2845 2846 if (B_neigs > nmax) { 2847 if (pcbddc->dbg_flag) { 2848 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2849 } 2850 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2851 B_neigs = nmax; 2852 } 2853 2854 nmin_s = PetscMin(nmin,B_N); 2855 if (B_neigs < nmin_s) { 2856 PetscBLASInt B_neigs2; 2857 2858 if (pcbddc->use_deluxe_scaling) { 2859 B_IL = B_N - nmin_s + 1; 2860 B_IU = B_N - B_neigs; 2861 } else { 2862 B_IL = B_neigs + 1; 2863 B_IU = nmin_s; 2864 } 2865 if (pcbddc->dbg_flag) { 2866 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2867 } 2868 if (sub_schurs->is_hermitian) { 2869 PetscInt j,k; 2870 for (j=0;j<subset_size;j++) { 2871 for (k=j;k<subset_size;k++) { 2872 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2873 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2874 } 2875 } 2876 } else { 2877 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2878 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2879 } 2880 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2881 #if defined(PETSC_USE_COMPLEX) 2882 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2883 #else 2884 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2885 #endif 2886 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2887 B_neigs += B_neigs2; 2888 } 2889 if (B_ierr) { 2890 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2891 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2892 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2893 } 2894 if (pcbddc->dbg_flag) { 2895 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2896 for (j=0;j<B_neigs;j++) { 2897 if (eigs[j] == 0.0) { 2898 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2899 } else { 2900 if (pcbddc->use_deluxe_scaling) { 2901 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2902 } else { 2903 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2904 } 2905 } 2906 } 2907 } 2908 } else { 2909 /* TODO */ 2910 } 2911 } 2912 /* change the basis back to the original one */ 2913 if (sub_schurs->change) { 2914 Mat change,phi,phit; 2915 2916 if (pcbddc->dbg_flag > 1) { 2917 PetscInt ii; 2918 for (ii=0;ii<B_neigs;ii++) { 2919 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2920 for (j=0;j<B_N;j++) { 2921 #if defined(PETSC_USE_COMPLEX) 2922 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2923 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2925 #else 2926 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2927 #endif 2928 } 2929 } 2930 } 2931 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2932 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2933 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2934 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2935 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2936 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2937 } 2938 maxneigs = PetscMax(B_neigs,maxneigs); 2939 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2940 if (B_neigs) { 2941 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2942 2943 if (pcbddc->dbg_flag > 1) { 2944 PetscInt ii; 2945 for (ii=0;ii<B_neigs;ii++) { 2946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2947 for (j=0;j<B_N;j++) { 2948 #if defined(PETSC_USE_COMPLEX) 2949 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2950 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2951 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2952 #else 2953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2954 #endif 2955 } 2956 } 2957 } 2958 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2959 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2960 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2961 cum++; 2962 } 2963 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2964 /* shift for next computation */ 2965 cumarray += subset_size*subset_size; 2966 } 2967 if (pcbddc->dbg_flag) { 2968 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2969 } 2970 2971 if (mss) { 2972 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2973 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2974 /* destroy matrices (junk) */ 2975 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2976 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2977 } 2978 if (allocated_S_St) { 2979 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2980 } 2981 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2982 #if defined(PETSC_USE_COMPLEX) 2983 ierr = PetscFree(rwork);CHKERRQ(ierr); 2984 #endif 2985 if (pcbddc->dbg_flag) { 2986 PetscInt maxneigs_r; 2987 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2989 } 2990 PetscFunctionReturn(0); 2991 } 2992 2993 #undef __FUNCT__ 2994 #define __FUNCT__ "PCBDDCSetUpSolvers" 2995 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2996 { 2997 PetscScalar *coarse_submat_vals; 2998 PetscErrorCode ierr; 2999 3000 PetscFunctionBegin; 3001 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3002 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3003 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3004 3005 /* Setup local neumann solver ksp_R */ 3006 /* PCBDDCSetUpLocalScatters should be called first! */ 3007 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3008 3009 /* 3010 Setup local correction and local part of coarse basis. 3011 Gives back the dense local part of the coarse matrix in column major ordering 3012 */ 3013 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3014 3015 /* Compute total number of coarse nodes and setup coarse solver */ 3016 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3017 3018 /* free */ 3019 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3020 PetscFunctionReturn(0); 3021 } 3022 3023 #undef __FUNCT__ 3024 #define __FUNCT__ "PCBDDCResetCustomization" 3025 PetscErrorCode PCBDDCResetCustomization(PC pc) 3026 { 3027 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3028 PetscErrorCode ierr; 3029 3030 PetscFunctionBegin; 3031 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3032 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3033 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3034 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3035 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3036 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3037 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3038 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3039 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3040 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3041 PetscFunctionReturn(0); 3042 } 3043 3044 #undef __FUNCT__ 3045 #define __FUNCT__ "PCBDDCResetTopography" 3046 PetscErrorCode PCBDDCResetTopography(PC pc) 3047 { 3048 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3049 PetscInt i; 3050 PetscErrorCode ierr; 3051 3052 PetscFunctionBegin; 3053 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3054 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3055 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3056 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3057 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3058 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3059 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3060 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3061 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3062 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3063 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3064 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3065 for (i=0;i<pcbddc->n_local_subs;i++) { 3066 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3067 } 3068 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3069 if (pcbddc->sub_schurs) { 3070 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 3071 } 3072 pcbddc->graphanalyzed = PETSC_FALSE; 3073 pcbddc->recompute_topography = PETSC_TRUE; 3074 PetscFunctionReturn(0); 3075 } 3076 3077 #undef __FUNCT__ 3078 #define __FUNCT__ "PCBDDCResetSolvers" 3079 PetscErrorCode PCBDDCResetSolvers(PC pc) 3080 { 3081 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3082 PetscErrorCode ierr; 3083 3084 PetscFunctionBegin; 3085 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3086 if (pcbddc->coarse_phi_B) { 3087 PetscScalar *array; 3088 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3089 ierr = PetscFree(array);CHKERRQ(ierr); 3090 } 3091 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3092 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3093 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3094 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3095 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3096 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3097 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3098 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3099 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3100 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3101 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3102 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3103 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3104 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3105 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 3106 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 3107 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3108 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3109 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3110 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3111 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3112 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3113 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3114 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3115 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3116 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3117 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3118 if (pcbddc->benign_zerodiag_subs) { 3119 PetscInt i; 3120 for (i=0;i<pcbddc->benign_n;i++) { 3121 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3122 } 3123 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3124 } 3125 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 3131 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3132 { 3133 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3134 PC_IS *pcis = (PC_IS*)pc->data; 3135 VecType impVecType; 3136 PetscInt n_constraints,n_R,old_size; 3137 PetscErrorCode ierr; 3138 3139 PetscFunctionBegin; 3140 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 3141 /* get sizes */ 3142 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3143 n_R = pcis->n - pcbddc->n_vertices; 3144 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3145 /* local work vectors (try to avoid unneeded work)*/ 3146 /* R nodes */ 3147 old_size = -1; 3148 if (pcbddc->vec1_R) { 3149 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3150 } 3151 if (n_R != old_size) { 3152 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3153 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3154 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3155 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3156 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3157 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3158 } 3159 /* local primal dofs */ 3160 old_size = -1; 3161 if (pcbddc->vec1_P) { 3162 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3163 } 3164 if (pcbddc->local_primal_size != old_size) { 3165 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3166 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3167 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3168 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3169 } 3170 /* local explicit constraints */ 3171 old_size = -1; 3172 if (pcbddc->vec1_C) { 3173 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3174 } 3175 if (n_constraints && n_constraints != old_size) { 3176 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3177 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3178 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3179 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3180 } 3181 PetscFunctionReturn(0); 3182 } 3183 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "PCBDDCSetUpCorrection" 3186 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3187 { 3188 PetscErrorCode ierr; 3189 /* pointers to pcis and pcbddc */ 3190 PC_IS* pcis = (PC_IS*)pc->data; 3191 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3192 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3193 /* submatrices of local problem */ 3194 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3195 /* submatrices of local coarse problem */ 3196 Mat S_VV,S_CV,S_VC,S_CC; 3197 /* working matrices */ 3198 Mat C_CR; 3199 /* additional working stuff */ 3200 PC pc_R; 3201 Mat F; 3202 Vec dummy_vec; 3203 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3204 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3205 PetscScalar *work; 3206 PetscInt *idx_V_B; 3207 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3208 PetscInt i,n_R,n_D,n_B; 3209 3210 /* some shortcuts to scalars */ 3211 PetscScalar one=1.0,m_one=-1.0; 3212 3213 PetscFunctionBegin; 3214 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3215 3216 /* Set Non-overlapping dimensions */ 3217 n_vertices = pcbddc->n_vertices; 3218 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3219 n_B = pcis->n_B; 3220 n_D = pcis->n - n_B; 3221 n_R = pcis->n - n_vertices; 3222 3223 /* vertices in boundary numbering */ 3224 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3225 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3226 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3227 3228 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3229 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3230 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3231 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3232 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3233 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3234 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3235 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3236 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3237 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3238 3239 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3240 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3241 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3242 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3243 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3244 lda_rhs = n_R; 3245 need_benign_correction = PETSC_FALSE; 3246 if (isLU || isILU || isCHOL) { 3247 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3248 } else if (sub_schurs && sub_schurs->reuse_solver) { 3249 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3250 MatFactorType type; 3251 3252 F = reuse_solver->F; 3253 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3254 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3255 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3256 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3257 } else { 3258 F = NULL; 3259 } 3260 3261 /* allocate workspace */ 3262 n = 0; 3263 if (n_constraints) { 3264 n += lda_rhs*n_constraints; 3265 } 3266 if (n_vertices) { 3267 n = PetscMax(2*lda_rhs*n_vertices,n); 3268 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3269 } 3270 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3271 3272 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3273 dummy_vec = NULL; 3274 if (need_benign_correction && lda_rhs != n_R && F) { 3275 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3276 } 3277 3278 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3279 if (n_constraints) { 3280 Mat M1,M2,M3,C_B; 3281 IS is_aux; 3282 PetscScalar *array,*array2; 3283 3284 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3285 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3286 3287 /* Extract constraints on R nodes: C_{CR} */ 3288 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3289 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3290 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3291 3292 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3293 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3294 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3295 for (i=0;i<n_constraints;i++) { 3296 const PetscScalar *row_cmat_values; 3297 const PetscInt *row_cmat_indices; 3298 PetscInt size_of_constraint,j; 3299 3300 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3301 for (j=0;j<size_of_constraint;j++) { 3302 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3303 } 3304 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3305 } 3306 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3307 if (F) { 3308 Mat B; 3309 3310 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3311 if (need_benign_correction) { 3312 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3313 3314 /* rhs is already zero on interior dofs, no need to change the rhs */ 3315 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3316 } 3317 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3318 if (need_benign_correction) { 3319 PetscScalar *marr; 3320 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3321 3322 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3323 if (lda_rhs != n_R) { 3324 for (i=0;i<n_constraints;i++) { 3325 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3326 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3327 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3328 } 3329 } else { 3330 for (i=0;i<n_constraints;i++) { 3331 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3332 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3333 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3334 } 3335 } 3336 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3337 } 3338 ierr = MatDestroy(&B);CHKERRQ(ierr); 3339 } else { 3340 PetscScalar *marr; 3341 3342 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3343 for (i=0;i<n_constraints;i++) { 3344 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3345 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3346 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3347 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3348 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3349 } 3350 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3351 } 3352 if (!pcbddc->switch_static) { 3353 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3354 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3355 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3356 for (i=0;i<n_constraints;i++) { 3357 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3358 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3359 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3360 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3361 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3362 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3363 } 3364 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3365 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3366 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3367 } else { 3368 if (lda_rhs != n_R) { 3369 IS dummy; 3370 3371 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3372 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3373 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3374 } else { 3375 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3376 pcbddc->local_auxmat2 = local_auxmat2_R; 3377 } 3378 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3379 } 3380 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3381 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3382 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3383 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3384 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3385 if (isCHOL) { 3386 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3387 } else { 3388 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3389 } 3390 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3391 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3392 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3393 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3394 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3395 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3396 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3397 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3398 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3399 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3400 } 3401 3402 /* Get submatrices from subdomain matrix */ 3403 if (n_vertices) { 3404 IS is_aux; 3405 3406 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3407 IS tis; 3408 3409 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3410 ierr = ISSort(tis);CHKERRQ(ierr); 3411 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3412 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3413 } else { 3414 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3415 } 3416 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3417 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3418 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3419 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3420 } 3421 3422 /* Matrix of coarse basis functions (local) */ 3423 if (pcbddc->coarse_phi_B) { 3424 PetscInt on_B,on_primal,on_D=n_D; 3425 if (pcbddc->coarse_phi_D) { 3426 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3427 } 3428 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3429 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3430 PetscScalar *marray; 3431 3432 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3433 ierr = PetscFree(marray);CHKERRQ(ierr); 3434 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3435 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3436 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3437 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3438 } 3439 } 3440 3441 if (!pcbddc->coarse_phi_B) { 3442 PetscScalar *marray; 3443 3444 n = n_B*pcbddc->local_primal_size; 3445 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3446 n += n_D*pcbddc->local_primal_size; 3447 } 3448 if (!pcbddc->symmetric_primal) { 3449 n *= 2; 3450 } 3451 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3452 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3453 n = n_B*pcbddc->local_primal_size; 3454 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3455 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3456 n += n_D*pcbddc->local_primal_size; 3457 } 3458 if (!pcbddc->symmetric_primal) { 3459 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3460 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3461 n = n_B*pcbddc->local_primal_size; 3462 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3463 } 3464 } else { 3465 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3466 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3467 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3468 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3469 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3470 } 3471 } 3472 } 3473 3474 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3475 p0_lidx_I = NULL; 3476 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3477 const PetscInt *idxs; 3478 3479 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3480 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3481 for (i=0;i<pcbddc->benign_n;i++) { 3482 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3483 } 3484 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3485 } 3486 3487 /* vertices */ 3488 if (n_vertices) { 3489 3490 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3491 3492 if (n_R) { 3493 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3494 PetscBLASInt B_N,B_one = 1; 3495 PetscScalar *x,*y; 3496 PetscBool isseqaij; 3497 3498 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3499 if (need_benign_correction) { 3500 ISLocalToGlobalMapping RtoN; 3501 IS is_p0; 3502 PetscInt *idxs_p0,n; 3503 3504 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3505 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3506 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3507 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3508 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3509 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3510 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3511 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3512 } 3513 3514 if (lda_rhs == n_R) { 3515 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3516 } else { 3517 PetscScalar *av,*array; 3518 const PetscInt *xadj,*adjncy; 3519 PetscInt n; 3520 PetscBool flg_row; 3521 3522 array = work+lda_rhs*n_vertices; 3523 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3524 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3525 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3526 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3527 for (i=0;i<n;i++) { 3528 PetscInt j; 3529 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3530 } 3531 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3532 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3534 } 3535 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3536 if (need_benign_correction) { 3537 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3538 PetscScalar *marr; 3539 3540 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3541 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3542 3543 | 0 0 0 | (V) 3544 L = | 0 0 -1 | (P-p0) 3545 | 0 0 -1 | (p0) 3546 3547 */ 3548 for (i=0;i<reuse_solver->benign_n;i++) { 3549 const PetscScalar *vals; 3550 const PetscInt *idxs,*idxs_zero; 3551 PetscInt n,j,nz; 3552 3553 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3554 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3555 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3556 for (j=0;j<n;j++) { 3557 PetscScalar val = vals[j]; 3558 PetscInt k,col = idxs[j]; 3559 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3560 } 3561 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3562 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3563 } 3564 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3565 } 3566 if (F) { 3567 /* need to correct the rhs */ 3568 if (need_benign_correction) { 3569 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3570 PetscScalar *marr; 3571 3572 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3573 if (lda_rhs != n_R) { 3574 for (i=0;i<n_vertices;i++) { 3575 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3576 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3577 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3578 } 3579 } else { 3580 for (i=0;i<n_vertices;i++) { 3581 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3582 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3583 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3584 } 3585 } 3586 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3587 } 3588 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3589 /* need to correct the solution */ 3590 if (need_benign_correction) { 3591 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3592 PetscScalar *marr; 3593 3594 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3595 if (lda_rhs != n_R) { 3596 for (i=0;i<n_vertices;i++) { 3597 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3598 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3599 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3600 } 3601 } else { 3602 for (i=0;i<n_vertices;i++) { 3603 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3604 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3605 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3606 } 3607 } 3608 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3609 } 3610 } else { 3611 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3612 for (i=0;i<n_vertices;i++) { 3613 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3614 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3615 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3616 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3617 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3618 } 3619 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3620 } 3621 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3622 /* S_VV and S_CV */ 3623 if (n_constraints) { 3624 Mat B; 3625 3626 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3627 for (i=0;i<n_vertices;i++) { 3628 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3629 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3630 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3631 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3632 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3633 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3634 } 3635 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3636 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3637 ierr = MatDestroy(&B);CHKERRQ(ierr); 3638 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3639 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3640 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3641 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3642 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3643 ierr = MatDestroy(&B);CHKERRQ(ierr); 3644 } 3645 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3646 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3647 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3648 } 3649 if (lda_rhs != n_R) { 3650 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3651 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3652 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3653 } 3654 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3655 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3656 if (need_benign_correction) { 3657 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3658 PetscScalar *marr,*sums; 3659 3660 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3661 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3662 for (i=0;i<reuse_solver->benign_n;i++) { 3663 const PetscScalar *vals; 3664 const PetscInt *idxs,*idxs_zero; 3665 PetscInt n,j,nz; 3666 3667 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3668 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3669 for (j=0;j<n_vertices;j++) { 3670 PetscInt k; 3671 sums[j] = 0.; 3672 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3673 } 3674 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3675 for (j=0;j<n;j++) { 3676 PetscScalar val = vals[j]; 3677 PetscInt k; 3678 for (k=0;k<n_vertices;k++) { 3679 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3680 } 3681 } 3682 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3683 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3684 } 3685 ierr = PetscFree(sums);CHKERRQ(ierr); 3686 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3687 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3688 } 3689 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3690 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3691 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3692 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3693 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3694 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3695 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3696 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3697 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3698 } else { 3699 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3700 } 3701 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3702 3703 /* coarse basis functions */ 3704 for (i=0;i<n_vertices;i++) { 3705 PetscScalar *y; 3706 3707 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3708 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3709 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3710 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3711 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3712 y[n_B*i+idx_V_B[i]] = 1.0; 3713 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3714 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3715 3716 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3717 PetscInt j; 3718 3719 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3720 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3721 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3722 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3723 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3724 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3725 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3726 } 3727 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3728 } 3729 /* if n_R == 0 the object is not destroyed */ 3730 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3731 } 3732 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3733 3734 if (n_constraints) { 3735 Mat B; 3736 3737 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3738 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3739 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3740 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3741 if (n_vertices) { 3742 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3743 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3744 } else { 3745 Mat S_VCt; 3746 3747 if (lda_rhs != n_R) { 3748 ierr = MatDestroy(&B);CHKERRQ(ierr); 3749 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3750 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3751 } 3752 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3753 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3754 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3755 } 3756 } 3757 ierr = MatDestroy(&B);CHKERRQ(ierr); 3758 /* coarse basis functions */ 3759 for (i=0;i<n_constraints;i++) { 3760 PetscScalar *y; 3761 3762 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3763 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3764 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3765 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3766 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3767 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3768 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3769 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3770 PetscInt j; 3771 3772 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3773 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3774 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3775 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3776 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3777 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3778 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3779 } 3780 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3781 } 3782 } 3783 if (n_constraints) { 3784 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3785 } 3786 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3787 3788 /* coarse matrix entries relative to B_0 */ 3789 if (pcbddc->benign_n) { 3790 Mat B0_B,B0_BPHI; 3791 IS is_dummy; 3792 PetscScalar *data; 3793 PetscInt j; 3794 3795 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3796 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3797 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3798 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3799 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3800 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3801 for (j=0;j<pcbddc->benign_n;j++) { 3802 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3803 for (i=0;i<pcbddc->local_primal_size;i++) { 3804 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3805 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3806 } 3807 } 3808 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3809 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3810 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3811 } 3812 3813 /* compute other basis functions for non-symmetric problems */ 3814 if (!pcbddc->symmetric_primal) { 3815 Mat B_V=NULL,B_C=NULL; 3816 PetscScalar *marray; 3817 3818 if (n_constraints) { 3819 Mat S_CCT,C_CRT; 3820 3821 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3822 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3823 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3824 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3825 if (n_vertices) { 3826 Mat S_VCT; 3827 3828 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3829 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3830 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3831 } 3832 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3833 } else { 3834 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3835 } 3836 if (n_vertices && n_R) { 3837 PetscScalar *av,*marray; 3838 const PetscInt *xadj,*adjncy; 3839 PetscInt n; 3840 PetscBool flg_row; 3841 3842 /* B_V = B_V - A_VR^T */ 3843 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3844 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3845 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3846 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3847 for (i=0;i<n;i++) { 3848 PetscInt j; 3849 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3850 } 3851 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3852 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3853 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3854 } 3855 3856 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3857 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3858 for (i=0;i<n_vertices;i++) { 3859 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3860 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3861 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3862 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3863 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3864 } 3865 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3866 if (B_C) { 3867 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3868 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3869 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3870 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3871 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3872 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3873 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3874 } 3875 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3876 } 3877 /* coarse basis functions */ 3878 for (i=0;i<pcbddc->local_primal_size;i++) { 3879 PetscScalar *y; 3880 3881 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3882 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3883 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3884 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3885 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3886 if (i<n_vertices) { 3887 y[n_B*i+idx_V_B[i]] = 1.0; 3888 } 3889 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3890 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3891 3892 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3893 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3894 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3895 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3896 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3897 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3898 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3899 } 3900 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3901 } 3902 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3903 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3904 } 3905 /* free memory */ 3906 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3907 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3908 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3909 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3910 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3911 ierr = PetscFree(work);CHKERRQ(ierr); 3912 if (n_vertices) { 3913 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3914 } 3915 if (n_constraints) { 3916 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3917 } 3918 /* Checking coarse_sub_mat and coarse basis functios */ 3919 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3920 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3921 if (pcbddc->dbg_flag) { 3922 Mat coarse_sub_mat; 3923 Mat AUXMAT,TM1,TM2,TM3,TM4; 3924 Mat coarse_phi_D,coarse_phi_B; 3925 Mat coarse_psi_D,coarse_psi_B; 3926 Mat A_II,A_BB,A_IB,A_BI; 3927 Mat C_B,CPHI; 3928 IS is_dummy; 3929 Vec mones; 3930 MatType checkmattype=MATSEQAIJ; 3931 PetscReal real_value; 3932 3933 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3934 Mat A; 3935 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3936 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3937 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3938 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3939 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3940 ierr = MatDestroy(&A);CHKERRQ(ierr); 3941 } else { 3942 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3943 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3944 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3945 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3946 } 3947 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3948 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3949 if (!pcbddc->symmetric_primal) { 3950 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3951 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3952 } 3953 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3954 3955 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3956 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3957 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3958 if (!pcbddc->symmetric_primal) { 3959 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3960 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3961 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3962 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3963 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3964 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3965 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3966 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3967 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3968 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3969 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3970 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3971 } else { 3972 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3973 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3974 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3975 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3976 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3977 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3978 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3979 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3980 } 3981 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3982 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3983 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3984 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3985 if (pcbddc->benign_n) { 3986 Mat B0_B,B0_BPHI; 3987 PetscScalar *data,*data2; 3988 PetscInt j; 3989 3990 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3991 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3992 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3993 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3994 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3995 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3996 for (j=0;j<pcbddc->benign_n;j++) { 3997 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3998 for (i=0;i<pcbddc->local_primal_size;i++) { 3999 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4000 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4001 } 4002 } 4003 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4004 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4005 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4006 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4007 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4008 } 4009 #if 0 4010 { 4011 PetscViewer viewer; 4012 char filename[256]; 4013 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4014 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4015 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4016 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4017 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4018 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4019 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4020 if (save_change) { 4021 Mat phi_B; 4022 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4023 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4024 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4025 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4026 } else { 4027 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4028 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4029 } 4030 if (pcbddc->coarse_phi_D) { 4031 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4032 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4033 } 4034 if (pcbddc->coarse_psi_B) { 4035 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4036 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4037 } 4038 if (pcbddc->coarse_psi_D) { 4039 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4040 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4041 } 4042 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4043 } 4044 #endif 4045 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4046 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4047 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4048 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4049 4050 /* check constraints */ 4051 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4052 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4053 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4054 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4055 } else { 4056 PetscScalar *data; 4057 Mat tmat; 4058 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4059 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4060 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4061 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4062 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4063 } 4064 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4065 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4066 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4067 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4069 if (!pcbddc->symmetric_primal) { 4070 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4071 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4072 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4073 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4074 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4075 } 4076 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4077 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4078 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4079 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4080 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4081 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4082 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4083 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4084 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4085 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4086 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4087 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4088 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4089 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4090 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4091 if (!pcbddc->symmetric_primal) { 4092 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4093 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4094 } 4095 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4096 } 4097 /* get back data */ 4098 *coarse_submat_vals_n = coarse_submat_vals; 4099 PetscFunctionReturn(0); 4100 } 4101 4102 #undef __FUNCT__ 4103 #define __FUNCT__ "MatGetSubMatrixUnsorted" 4104 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4105 { 4106 Mat *work_mat; 4107 IS isrow_s,iscol_s; 4108 PetscBool rsorted,csorted; 4109 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4110 PetscErrorCode ierr; 4111 4112 PetscFunctionBegin; 4113 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4114 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4115 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4116 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4117 4118 if (!rsorted) { 4119 const PetscInt *idxs; 4120 PetscInt *idxs_sorted,i; 4121 4122 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4123 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4124 for (i=0;i<rsize;i++) { 4125 idxs_perm_r[i] = i; 4126 } 4127 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4128 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4129 for (i=0;i<rsize;i++) { 4130 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4131 } 4132 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4133 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4134 } else { 4135 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4136 isrow_s = isrow; 4137 } 4138 4139 if (!csorted) { 4140 if (isrow == iscol) { 4141 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4142 iscol_s = isrow_s; 4143 } else { 4144 const PetscInt *idxs; 4145 PetscInt *idxs_sorted,i; 4146 4147 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4148 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4149 for (i=0;i<csize;i++) { 4150 idxs_perm_c[i] = i; 4151 } 4152 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4153 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4154 for (i=0;i<csize;i++) { 4155 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4156 } 4157 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4158 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4159 } 4160 } else { 4161 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4162 iscol_s = iscol; 4163 } 4164 4165 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4166 4167 if (!rsorted || !csorted) { 4168 Mat new_mat; 4169 IS is_perm_r,is_perm_c; 4170 4171 if (!rsorted) { 4172 PetscInt *idxs_r,i; 4173 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4174 for (i=0;i<rsize;i++) { 4175 idxs_r[idxs_perm_r[i]] = i; 4176 } 4177 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4178 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4179 } else { 4180 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4181 } 4182 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4183 4184 if (!csorted) { 4185 if (isrow_s == iscol_s) { 4186 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4187 is_perm_c = is_perm_r; 4188 } else { 4189 PetscInt *idxs_c,i; 4190 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4191 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4192 for (i=0;i<csize;i++) { 4193 idxs_c[idxs_perm_c[i]] = i; 4194 } 4195 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4196 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4197 } 4198 } else { 4199 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4200 } 4201 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4202 4203 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4204 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4205 work_mat[0] = new_mat; 4206 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4207 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4208 } 4209 4210 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4211 *B = work_mat[0]; 4212 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4213 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4214 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4215 PetscFunctionReturn(0); 4216 } 4217 4218 #undef __FUNCT__ 4219 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4220 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4221 { 4222 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4223 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4224 Mat new_mat; 4225 IS is_local,is_global; 4226 PetscInt local_size; 4227 PetscBool isseqaij; 4228 PetscErrorCode ierr; 4229 4230 PetscFunctionBegin; 4231 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4232 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4233 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4234 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4235 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4236 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4237 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4238 4239 /* check */ 4240 if (pcbddc->dbg_flag) { 4241 Vec x,x_change; 4242 PetscReal error; 4243 4244 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4245 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4246 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4247 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4248 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4249 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4250 if (!pcbddc->change_interior) { 4251 const PetscScalar *x,*y,*v; 4252 PetscReal lerror = 0.; 4253 PetscInt i; 4254 4255 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4256 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4257 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4258 for (i=0;i<local_size;i++) 4259 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4260 lerror = PetscAbsScalar(x[i]-y[i]); 4261 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4262 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4263 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4264 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on I: %1.6e\n",error);CHKERRQ(ierr); 4266 } 4267 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4268 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4269 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4270 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4271 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4272 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 4273 ierr = VecDestroy(&x);CHKERRQ(ierr); 4274 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4275 } 4276 4277 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4278 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4279 if (isseqaij) { 4280 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4281 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4282 } else { 4283 Mat work_mat; 4284 4285 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4286 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4287 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4288 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4289 } 4290 if (matis->A->symmetric_set) { 4291 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4292 #if !defined(PETSC_USE_COMPLEX) 4293 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4294 #endif 4295 } 4296 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4297 PetscFunctionReturn(0); 4298 } 4299 4300 #undef __FUNCT__ 4301 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4302 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4303 { 4304 PC_IS* pcis = (PC_IS*)(pc->data); 4305 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4306 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4307 PetscInt *idx_R_local=NULL; 4308 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4309 PetscInt vbs,bs; 4310 PetscBT bitmask=NULL; 4311 PetscErrorCode ierr; 4312 4313 PetscFunctionBegin; 4314 /* 4315 No need to setup local scatters if 4316 - primal space is unchanged 4317 AND 4318 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4319 AND 4320 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4321 */ 4322 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4323 PetscFunctionReturn(0); 4324 } 4325 /* destroy old objects */ 4326 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4327 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4328 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4329 /* Set Non-overlapping dimensions */ 4330 n_B = pcis->n_B; 4331 n_D = pcis->n - n_B; 4332 n_vertices = pcbddc->n_vertices; 4333 4334 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4335 4336 /* create auxiliary bitmask and allocate workspace */ 4337 if (!sub_schurs || !sub_schurs->reuse_solver) { 4338 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4339 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4340 for (i=0;i<n_vertices;i++) { 4341 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4342 } 4343 4344 for (i=0, n_R=0; i<pcis->n; i++) { 4345 if (!PetscBTLookup(bitmask,i)) { 4346 idx_R_local[n_R++] = i; 4347 } 4348 } 4349 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4350 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4351 4352 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4353 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4354 } 4355 4356 /* Block code */ 4357 vbs = 1; 4358 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4359 if (bs>1 && !(n_vertices%bs)) { 4360 PetscBool is_blocked = PETSC_TRUE; 4361 PetscInt *vary; 4362 if (!sub_schurs || !sub_schurs->reuse_solver) { 4363 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4364 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4365 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4366 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4367 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4368 for (i=0; i<pcis->n/bs; i++) { 4369 if (vary[i]!=0 && vary[i]!=bs) { 4370 is_blocked = PETSC_FALSE; 4371 break; 4372 } 4373 } 4374 ierr = PetscFree(vary);CHKERRQ(ierr); 4375 } else { 4376 /* Verify directly the R set */ 4377 for (i=0; i<n_R/bs; i++) { 4378 PetscInt j,node=idx_R_local[bs*i]; 4379 for (j=1; j<bs; j++) { 4380 if (node != idx_R_local[bs*i+j]-j) { 4381 is_blocked = PETSC_FALSE; 4382 break; 4383 } 4384 } 4385 } 4386 } 4387 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4388 vbs = bs; 4389 for (i=0;i<n_R/vbs;i++) { 4390 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4391 } 4392 } 4393 } 4394 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4395 if (sub_schurs && sub_schurs->reuse_solver) { 4396 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4397 4398 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4399 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4400 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4401 reuse_solver->is_R = pcbddc->is_R_local; 4402 } else { 4403 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4404 } 4405 4406 /* print some info if requested */ 4407 if (pcbddc->dbg_flag) { 4408 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4409 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4410 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4414 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4415 } 4416 4417 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4418 if (!sub_schurs || !sub_schurs->reuse_solver) { 4419 IS is_aux1,is_aux2; 4420 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4421 4422 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4423 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4424 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4425 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4426 for (i=0; i<n_D; i++) { 4427 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4428 } 4429 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4430 for (i=0, j=0; i<n_R; i++) { 4431 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4432 aux_array1[j++] = i; 4433 } 4434 } 4435 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4436 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4437 for (i=0, j=0; i<n_B; i++) { 4438 if (!PetscBTLookup(bitmask,is_indices[i])) { 4439 aux_array2[j++] = i; 4440 } 4441 } 4442 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4443 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4444 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4445 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4446 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4447 4448 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4449 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4450 for (i=0, j=0; i<n_R; i++) { 4451 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4452 aux_array1[j++] = i; 4453 } 4454 } 4455 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4456 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4457 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4458 } 4459 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4460 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4461 } else { 4462 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4463 IS tis; 4464 PetscInt schur_size; 4465 4466 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4467 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4468 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4469 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4470 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4471 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4472 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4473 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4474 } 4475 } 4476 PetscFunctionReturn(0); 4477 } 4478 4479 4480 #undef __FUNCT__ 4481 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4482 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4483 { 4484 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4485 PC_IS *pcis = (PC_IS*)pc->data; 4486 PC pc_temp; 4487 Mat A_RR; 4488 MatReuse reuse; 4489 PetscScalar m_one = -1.0; 4490 PetscReal value; 4491 PetscInt n_D,n_R; 4492 PetscBool check_corr[2],issbaij; 4493 PetscErrorCode ierr; 4494 /* prefixes stuff */ 4495 char dir_prefix[256],neu_prefix[256],str_level[16]; 4496 size_t len; 4497 4498 PetscFunctionBegin; 4499 4500 /* compute prefixes */ 4501 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4502 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4503 if (!pcbddc->current_level) { 4504 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4505 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4506 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4507 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4508 } else { 4509 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4510 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4511 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4512 len -= 15; /* remove "pc_bddc_coarse_" */ 4513 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4514 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4515 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4516 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4517 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4518 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4519 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4520 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4521 } 4522 4523 /* DIRICHLET PROBLEM */ 4524 if (dirichlet) { 4525 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4526 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4527 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4528 if (pcbddc->dbg_flag) { 4529 Mat A_IIn; 4530 4531 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4532 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4533 pcis->A_II = A_IIn; 4534 } 4535 } 4536 if (pcbddc->local_mat->symmetric_set) { 4537 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4538 } 4539 /* Matrix for Dirichlet problem is pcis->A_II */ 4540 n_D = pcis->n - pcis->n_B; 4541 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4542 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4543 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4544 /* default */ 4545 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4546 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4547 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4548 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4549 if (issbaij) { 4550 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4551 } else { 4552 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4553 } 4554 /* Allow user's customization */ 4555 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4556 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4557 } 4558 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4559 if (sub_schurs && sub_schurs->reuse_solver) { 4560 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4561 4562 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4563 } 4564 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4565 if (!n_D) { 4566 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4567 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4568 } 4569 /* Set Up KSP for Dirichlet problem of BDDC */ 4570 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4571 /* set ksp_D into pcis data */ 4572 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4573 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4574 pcis->ksp_D = pcbddc->ksp_D; 4575 } 4576 4577 /* NEUMANN PROBLEM */ 4578 A_RR = 0; 4579 if (neumann) { 4580 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4581 PetscInt ibs,mbs; 4582 PetscBool issbaij; 4583 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4584 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4585 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4586 if (pcbddc->ksp_R) { /* already created ksp */ 4587 PetscInt nn_R; 4588 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4589 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4590 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4591 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4592 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4593 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4594 reuse = MAT_INITIAL_MATRIX; 4595 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4596 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4597 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4598 reuse = MAT_INITIAL_MATRIX; 4599 } else { /* safe to reuse the matrix */ 4600 reuse = MAT_REUSE_MATRIX; 4601 } 4602 } 4603 /* last check */ 4604 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4605 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4606 reuse = MAT_INITIAL_MATRIX; 4607 } 4608 } else { /* first time, so we need to create the matrix */ 4609 reuse = MAT_INITIAL_MATRIX; 4610 } 4611 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4612 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4613 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4614 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4615 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4616 if (matis->A == pcbddc->local_mat) { 4617 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4618 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4619 } else { 4620 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4621 } 4622 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4623 if (matis->A == pcbddc->local_mat) { 4624 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4625 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4626 } else { 4627 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4628 } 4629 } 4630 /* extract A_RR */ 4631 if (sub_schurs && sub_schurs->reuse_solver) { 4632 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4633 4634 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4635 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4636 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4637 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4638 } else { 4639 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4640 } 4641 } else { 4642 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4643 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4644 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4645 } 4646 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4647 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4648 } 4649 if (pcbddc->local_mat->symmetric_set) { 4650 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4651 } 4652 if (!pcbddc->ksp_R) { /* create object if not present */ 4653 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4654 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4655 /* default */ 4656 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4657 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4658 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4659 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4660 if (issbaij) { 4661 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4662 } else { 4663 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4664 } 4665 /* Allow user's customization */ 4666 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4667 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4668 } 4669 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4670 if (!n_R) { 4671 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4672 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4673 } 4674 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4675 /* Reuse solver if it is present */ 4676 if (sub_schurs && sub_schurs->reuse_solver) { 4677 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4678 4679 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4680 } 4681 /* Set Up KSP for Neumann problem of BDDC */ 4682 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4683 } 4684 4685 if (pcbddc->dbg_flag) { 4686 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4687 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4688 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4689 } 4690 4691 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4692 check_corr[0] = check_corr[1] = PETSC_FALSE; 4693 if (pcbddc->NullSpace_corr[0]) { 4694 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4695 } 4696 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4697 check_corr[0] = PETSC_TRUE; 4698 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4699 } 4700 if (neumann && pcbddc->NullSpace_corr[2]) { 4701 check_corr[1] = PETSC_TRUE; 4702 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4703 } 4704 4705 /* check Dirichlet and Neumann solvers */ 4706 if (pcbddc->dbg_flag) { 4707 if (dirichlet) { /* Dirichlet */ 4708 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4709 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4710 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4711 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4712 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4714 if (check_corr[0]) { 4715 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4716 } 4717 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4718 } 4719 if (neumann) { /* Neumann */ 4720 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4721 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4722 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4723 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4724 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4725 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4726 if (check_corr[1]) { 4727 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4728 } 4729 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4730 } 4731 } 4732 /* free Neumann problem's matrix */ 4733 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4734 PetscFunctionReturn(0); 4735 } 4736 4737 #undef __FUNCT__ 4738 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4739 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4740 { 4741 PetscErrorCode ierr; 4742 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4743 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4744 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4745 4746 PetscFunctionBegin; 4747 if (!reuse_solver) { 4748 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4749 } 4750 if (!pcbddc->switch_static) { 4751 if (applytranspose && pcbddc->local_auxmat1) { 4752 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4753 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4754 } 4755 if (!reuse_solver) { 4756 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4757 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4758 } else { 4759 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4760 4761 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4762 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4763 } 4764 } else { 4765 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4766 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4767 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4768 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4769 if (applytranspose && pcbddc->local_auxmat1) { 4770 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4771 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4772 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4773 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4774 } 4775 } 4776 if (!reuse_solver || pcbddc->switch_static) { 4777 if (applytranspose) { 4778 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4779 } else { 4780 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4781 } 4782 } else { 4783 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4784 4785 if (applytranspose) { 4786 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4787 } else { 4788 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4789 } 4790 } 4791 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4792 if (!pcbddc->switch_static) { 4793 if (!reuse_solver) { 4794 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4795 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4796 } else { 4797 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4798 4799 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4800 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4801 } 4802 if (!applytranspose && pcbddc->local_auxmat1) { 4803 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4804 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4805 } 4806 } else { 4807 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4808 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4809 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4810 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4811 if (!applytranspose && pcbddc->local_auxmat1) { 4812 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4813 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4814 } 4815 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4816 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4817 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4818 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4819 } 4820 PetscFunctionReturn(0); 4821 } 4822 4823 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4824 #undef __FUNCT__ 4825 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4826 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4827 { 4828 PetscErrorCode ierr; 4829 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4830 PC_IS* pcis = (PC_IS*) (pc->data); 4831 const PetscScalar zero = 0.0; 4832 4833 PetscFunctionBegin; 4834 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4835 if (!pcbddc->benign_apply_coarse_only) { 4836 if (applytranspose) { 4837 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4838 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4839 } else { 4840 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4841 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4842 } 4843 } else { 4844 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4845 } 4846 4847 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4848 if (pcbddc->benign_n) { 4849 PetscScalar *array; 4850 PetscInt j; 4851 4852 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4853 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4854 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4855 } 4856 4857 /* start communications from local primal nodes to rhs of coarse solver */ 4858 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4859 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4860 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4861 4862 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4863 if (pcbddc->coarse_ksp) { 4864 Mat coarse_mat; 4865 Vec rhs,sol; 4866 MatNullSpace nullsp; 4867 PetscBool isbddc = PETSC_FALSE; 4868 4869 if (pcbddc->benign_have_null) { 4870 PC coarse_pc; 4871 4872 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4873 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4874 /* we need to propagate to coarser levels the need for a possible benign correction */ 4875 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4876 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4877 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4878 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4879 } 4880 } 4881 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4882 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4883 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4884 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4885 if (nullsp) { 4886 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4887 } 4888 if (applytranspose) { 4889 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4890 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4891 } else { 4892 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4893 PC coarse_pc; 4894 4895 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4896 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4897 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4898 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4899 } else { 4900 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4901 } 4902 } 4903 /* we don't need the benign correction at coarser levels anymore */ 4904 if (pcbddc->benign_have_null && isbddc) { 4905 PC coarse_pc; 4906 PC_BDDC* coarsepcbddc; 4907 4908 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4909 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4910 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4911 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4912 } 4913 if (nullsp) { 4914 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4915 } 4916 } 4917 4918 /* Local solution on R nodes */ 4919 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4920 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4921 } 4922 /* communications from coarse sol to local primal nodes */ 4923 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4924 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4925 4926 /* Sum contributions from the two levels */ 4927 if (!pcbddc->benign_apply_coarse_only) { 4928 if (applytranspose) { 4929 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4930 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4931 } else { 4932 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4933 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4934 } 4935 /* store p0 */ 4936 if (pcbddc->benign_n) { 4937 PetscScalar *array; 4938 PetscInt j; 4939 4940 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4941 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4942 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4943 } 4944 } else { /* expand the coarse solution */ 4945 if (applytranspose) { 4946 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4947 } else { 4948 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4949 } 4950 } 4951 PetscFunctionReturn(0); 4952 } 4953 4954 #undef __FUNCT__ 4955 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4956 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4957 { 4958 PetscErrorCode ierr; 4959 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4960 PetscScalar *array; 4961 Vec from,to; 4962 4963 PetscFunctionBegin; 4964 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4965 from = pcbddc->coarse_vec; 4966 to = pcbddc->vec1_P; 4967 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4968 Vec tvec; 4969 4970 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4971 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4972 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4973 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4974 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4975 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4976 } 4977 } else { /* from local to global -> put data in coarse right hand side */ 4978 from = pcbddc->vec1_P; 4979 to = pcbddc->coarse_vec; 4980 } 4981 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4982 PetscFunctionReturn(0); 4983 } 4984 4985 #undef __FUNCT__ 4986 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4987 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4988 { 4989 PetscErrorCode ierr; 4990 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4991 PetscScalar *array; 4992 Vec from,to; 4993 4994 PetscFunctionBegin; 4995 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4996 from = pcbddc->coarse_vec; 4997 to = pcbddc->vec1_P; 4998 } else { /* from local to global -> put data in coarse right hand side */ 4999 from = pcbddc->vec1_P; 5000 to = pcbddc->coarse_vec; 5001 } 5002 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5003 if (smode == SCATTER_FORWARD) { 5004 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5005 Vec tvec; 5006 5007 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5008 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5009 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5010 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5011 } 5012 } else { 5013 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5014 ierr = VecResetArray(from);CHKERRQ(ierr); 5015 } 5016 } 5017 PetscFunctionReturn(0); 5018 } 5019 5020 /* uncomment for testing purposes */ 5021 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5022 #undef __FUNCT__ 5023 #define __FUNCT__ "PCBDDCConstraintsSetUp" 5024 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5025 { 5026 PetscErrorCode ierr; 5027 PC_IS* pcis = (PC_IS*)(pc->data); 5028 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5029 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5030 /* one and zero */ 5031 PetscScalar one=1.0,zero=0.0; 5032 /* space to store constraints and their local indices */ 5033 PetscScalar *constraints_data; 5034 PetscInt *constraints_idxs,*constraints_idxs_B; 5035 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5036 PetscInt *constraints_n; 5037 /* iterators */ 5038 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5039 /* BLAS integers */ 5040 PetscBLASInt lwork,lierr; 5041 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5042 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5043 /* reuse */ 5044 PetscInt olocal_primal_size,olocal_primal_size_cc; 5045 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5046 /* change of basis */ 5047 PetscBool qr_needed; 5048 PetscBT change_basis,qr_needed_idx; 5049 /* auxiliary stuff */ 5050 PetscInt *nnz,*is_indices; 5051 PetscInt ncc; 5052 /* some quantities */ 5053 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5054 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5055 5056 PetscFunctionBegin; 5057 /* Destroy Mat objects computed previously */ 5058 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5059 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5060 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5061 /* save info on constraints from previous setup (if any) */ 5062 olocal_primal_size = pcbddc->local_primal_size; 5063 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5064 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5065 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5066 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5067 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5068 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5069 5070 if (!pcbddc->adaptive_selection) { 5071 IS ISForVertices,*ISForFaces,*ISForEdges; 5072 MatNullSpace nearnullsp; 5073 const Vec *nearnullvecs; 5074 Vec *localnearnullsp; 5075 PetscScalar *array; 5076 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5077 PetscBool nnsp_has_cnst; 5078 /* LAPACK working arrays for SVD or POD */ 5079 PetscBool skip_lapack,boolforchange; 5080 PetscScalar *work; 5081 PetscReal *singular_vals; 5082 #if defined(PETSC_USE_COMPLEX) 5083 PetscReal *rwork; 5084 #endif 5085 #if defined(PETSC_MISSING_LAPACK_GESVD) 5086 PetscScalar *temp_basis,*correlation_mat; 5087 #else 5088 PetscBLASInt dummy_int=1; 5089 PetscScalar dummy_scalar=1.; 5090 #endif 5091 5092 /* Get index sets for faces, edges and vertices from graph */ 5093 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5094 /* print some info */ 5095 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5096 PetscInt nv; 5097 5098 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5099 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5100 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5101 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5102 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5103 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5106 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5107 } 5108 5109 /* free unneeded index sets */ 5110 if (!pcbddc->use_vertices) { 5111 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5112 } 5113 if (!pcbddc->use_edges) { 5114 for (i=0;i<n_ISForEdges;i++) { 5115 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5116 } 5117 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5118 n_ISForEdges = 0; 5119 } 5120 if (!pcbddc->use_faces) { 5121 for (i=0;i<n_ISForFaces;i++) { 5122 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5123 } 5124 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5125 n_ISForFaces = 0; 5126 } 5127 5128 /* check if near null space is attached to global mat */ 5129 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5130 if (nearnullsp) { 5131 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5132 /* remove any stored info */ 5133 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5134 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5135 /* store information for BDDC solver reuse */ 5136 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5137 pcbddc->onearnullspace = nearnullsp; 5138 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5139 for (i=0;i<nnsp_size;i++) { 5140 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5141 } 5142 } else { /* if near null space is not provided BDDC uses constants by default */ 5143 nnsp_size = 0; 5144 nnsp_has_cnst = PETSC_TRUE; 5145 } 5146 /* get max number of constraints on a single cc */ 5147 max_constraints = nnsp_size; 5148 if (nnsp_has_cnst) max_constraints++; 5149 5150 /* 5151 Evaluate maximum storage size needed by the procedure 5152 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5153 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5154 There can be multiple constraints per connected component 5155 */ 5156 n_vertices = 0; 5157 if (ISForVertices) { 5158 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5159 } 5160 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5161 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5162 5163 total_counts = n_ISForFaces+n_ISForEdges; 5164 total_counts *= max_constraints; 5165 total_counts += n_vertices; 5166 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5167 5168 total_counts = 0; 5169 max_size_of_constraint = 0; 5170 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5171 IS used_is; 5172 if (i<n_ISForEdges) { 5173 used_is = ISForEdges[i]; 5174 } else { 5175 used_is = ISForFaces[i-n_ISForEdges]; 5176 } 5177 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5178 total_counts += j; 5179 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5180 } 5181 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5182 5183 /* get local part of global near null space vectors */ 5184 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5185 for (k=0;k<nnsp_size;k++) { 5186 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5187 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5188 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5189 } 5190 5191 /* whether or not to skip lapack calls */ 5192 skip_lapack = PETSC_TRUE; 5193 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5194 5195 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5196 if (!skip_lapack) { 5197 PetscScalar temp_work; 5198 5199 #if defined(PETSC_MISSING_LAPACK_GESVD) 5200 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5201 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5202 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5203 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5204 #if defined(PETSC_USE_COMPLEX) 5205 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5206 #endif 5207 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5208 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5209 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5210 lwork = -1; 5211 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5212 #if !defined(PETSC_USE_COMPLEX) 5213 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5214 #else 5215 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5216 #endif 5217 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5218 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5219 #else /* on missing GESVD */ 5220 /* SVD */ 5221 PetscInt max_n,min_n; 5222 max_n = max_size_of_constraint; 5223 min_n = max_constraints; 5224 if (max_size_of_constraint < max_constraints) { 5225 min_n = max_size_of_constraint; 5226 max_n = max_constraints; 5227 } 5228 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5229 #if defined(PETSC_USE_COMPLEX) 5230 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5231 #endif 5232 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5233 lwork = -1; 5234 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5235 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5236 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5237 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5238 #if !defined(PETSC_USE_COMPLEX) 5239 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5240 #else 5241 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5242 #endif 5243 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5244 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5245 #endif /* on missing GESVD */ 5246 /* Allocate optimal workspace */ 5247 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5248 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5249 } 5250 /* Now we can loop on constraining sets */ 5251 total_counts = 0; 5252 constraints_idxs_ptr[0] = 0; 5253 constraints_data_ptr[0] = 0; 5254 /* vertices */ 5255 if (n_vertices) { 5256 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5257 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5258 for (i=0;i<n_vertices;i++) { 5259 constraints_n[total_counts] = 1; 5260 constraints_data[total_counts] = 1.0; 5261 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5262 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5263 total_counts++; 5264 } 5265 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5266 n_vertices = total_counts; 5267 } 5268 5269 /* edges and faces */ 5270 total_counts_cc = total_counts; 5271 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5272 IS used_is; 5273 PetscBool idxs_copied = PETSC_FALSE; 5274 5275 if (ncc<n_ISForEdges) { 5276 used_is = ISForEdges[ncc]; 5277 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5278 } else { 5279 used_is = ISForFaces[ncc-n_ISForEdges]; 5280 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5281 } 5282 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5283 5284 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5285 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5286 /* change of basis should not be performed on local periodic nodes */ 5287 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5288 if (nnsp_has_cnst) { 5289 PetscScalar quad_value; 5290 5291 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5292 idxs_copied = PETSC_TRUE; 5293 5294 if (!pcbddc->use_nnsp_true) { 5295 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5296 } else { 5297 quad_value = 1.0; 5298 } 5299 for (j=0;j<size_of_constraint;j++) { 5300 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5301 } 5302 temp_constraints++; 5303 total_counts++; 5304 } 5305 for (k=0;k<nnsp_size;k++) { 5306 PetscReal real_value; 5307 PetscScalar *ptr_to_data; 5308 5309 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5310 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5311 for (j=0;j<size_of_constraint;j++) { 5312 ptr_to_data[j] = array[is_indices[j]]; 5313 } 5314 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5315 /* check if array is null on the connected component */ 5316 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5317 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5318 if (real_value > 0.0) { /* keep indices and values */ 5319 temp_constraints++; 5320 total_counts++; 5321 if (!idxs_copied) { 5322 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5323 idxs_copied = PETSC_TRUE; 5324 } 5325 } 5326 } 5327 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5328 valid_constraints = temp_constraints; 5329 if (!pcbddc->use_nnsp_true && temp_constraints) { 5330 if (temp_constraints == 1) { /* just normalize the constraint */ 5331 PetscScalar norm,*ptr_to_data; 5332 5333 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5334 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5335 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5336 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5337 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5338 } else { /* perform SVD */ 5339 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5340 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5341 5342 #if defined(PETSC_MISSING_LAPACK_GESVD) 5343 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5344 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5345 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5346 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5347 from that computed using LAPACKgesvd 5348 -> This is due to a different computation of eigenvectors in LAPACKheev 5349 -> The quality of the POD-computed basis will be the same */ 5350 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5351 /* Store upper triangular part of correlation matrix */ 5352 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5353 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5354 for (j=0;j<temp_constraints;j++) { 5355 for (k=0;k<j+1;k++) { 5356 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5357 } 5358 } 5359 /* compute eigenvalues and eigenvectors of correlation matrix */ 5360 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5361 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5362 #if !defined(PETSC_USE_COMPLEX) 5363 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5364 #else 5365 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5366 #endif 5367 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5368 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5369 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5370 j = 0; 5371 while (j < temp_constraints && singular_vals[j] < tol) j++; 5372 total_counts = total_counts-j; 5373 valid_constraints = temp_constraints-j; 5374 /* scale and copy POD basis into used quadrature memory */ 5375 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5376 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5377 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5378 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5379 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5380 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5381 if (j<temp_constraints) { 5382 PetscInt ii; 5383 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5384 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5385 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5386 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5387 for (k=0;k<temp_constraints-j;k++) { 5388 for (ii=0;ii<size_of_constraint;ii++) { 5389 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5390 } 5391 } 5392 } 5393 #else /* on missing GESVD */ 5394 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5395 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5396 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5397 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5398 #if !defined(PETSC_USE_COMPLEX) 5399 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5400 #else 5401 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5402 #endif 5403 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5404 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5405 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5406 k = temp_constraints; 5407 if (k > size_of_constraint) k = size_of_constraint; 5408 j = 0; 5409 while (j < k && singular_vals[k-j-1] < tol) j++; 5410 valid_constraints = k-j; 5411 total_counts = total_counts-temp_constraints+valid_constraints; 5412 #endif /* on missing GESVD */ 5413 } 5414 } 5415 /* update pointers information */ 5416 if (valid_constraints) { 5417 constraints_n[total_counts_cc] = valid_constraints; 5418 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5419 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5420 /* set change_of_basis flag */ 5421 if (boolforchange) { 5422 PetscBTSet(change_basis,total_counts_cc); 5423 } 5424 total_counts_cc++; 5425 } 5426 } 5427 /* free workspace */ 5428 if (!skip_lapack) { 5429 ierr = PetscFree(work);CHKERRQ(ierr); 5430 #if defined(PETSC_USE_COMPLEX) 5431 ierr = PetscFree(rwork);CHKERRQ(ierr); 5432 #endif 5433 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5434 #if defined(PETSC_MISSING_LAPACK_GESVD) 5435 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5436 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5437 #endif 5438 } 5439 for (k=0;k<nnsp_size;k++) { 5440 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5441 } 5442 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5443 /* free index sets of faces, edges and vertices */ 5444 for (i=0;i<n_ISForFaces;i++) { 5445 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5446 } 5447 if (n_ISForFaces) { 5448 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5449 } 5450 for (i=0;i<n_ISForEdges;i++) { 5451 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5452 } 5453 if (n_ISForEdges) { 5454 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5455 } 5456 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5457 } else { 5458 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5459 5460 total_counts = 0; 5461 n_vertices = 0; 5462 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5463 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5464 } 5465 max_constraints = 0; 5466 total_counts_cc = 0; 5467 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5468 total_counts += pcbddc->adaptive_constraints_n[i]; 5469 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5470 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5471 } 5472 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5473 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5474 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5475 constraints_data = pcbddc->adaptive_constraints_data; 5476 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5477 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5478 total_counts_cc = 0; 5479 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5480 if (pcbddc->adaptive_constraints_n[i]) { 5481 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5482 } 5483 } 5484 #if 0 5485 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5486 for (i=0;i<total_counts_cc;i++) { 5487 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5488 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5489 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5490 printf(" %d",constraints_idxs[j]); 5491 } 5492 printf("\n"); 5493 printf("number of cc: %d\n",constraints_n[i]); 5494 } 5495 for (i=0;i<n_vertices;i++) { 5496 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5497 } 5498 for (i=0;i<sub_schurs->n_subs;i++) { 5499 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5500 } 5501 #endif 5502 5503 max_size_of_constraint = 0; 5504 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5505 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5506 /* Change of basis */ 5507 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5508 if (pcbddc->use_change_of_basis) { 5509 for (i=0;i<sub_schurs->n_subs;i++) { 5510 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5511 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5512 } 5513 } 5514 } 5515 } 5516 pcbddc->local_primal_size = total_counts; 5517 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5518 5519 /* map constraints_idxs in boundary numbering */ 5520 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5521 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5522 5523 /* Create constraint matrix */ 5524 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5525 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5526 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5527 5528 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5529 /* determine if a QR strategy is needed for change of basis */ 5530 qr_needed = PETSC_FALSE; 5531 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5532 total_primal_vertices=0; 5533 pcbddc->local_primal_size_cc = 0; 5534 for (i=0;i<total_counts_cc;i++) { 5535 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5536 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5537 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5538 pcbddc->local_primal_size_cc += 1; 5539 } else if (PetscBTLookup(change_basis,i)) { 5540 for (k=0;k<constraints_n[i];k++) { 5541 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5542 } 5543 pcbddc->local_primal_size_cc += constraints_n[i]; 5544 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5545 PetscBTSet(qr_needed_idx,i); 5546 qr_needed = PETSC_TRUE; 5547 } 5548 } else { 5549 pcbddc->local_primal_size_cc += 1; 5550 } 5551 } 5552 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5553 pcbddc->n_vertices = total_primal_vertices; 5554 /* permute indices in order to have a sorted set of vertices */ 5555 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5556 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5557 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5558 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5559 5560 /* nonzero structure of constraint matrix */ 5561 /* and get reference dof for local constraints */ 5562 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5563 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5564 5565 j = total_primal_vertices; 5566 total_counts = total_primal_vertices; 5567 cum = total_primal_vertices; 5568 for (i=n_vertices;i<total_counts_cc;i++) { 5569 if (!PetscBTLookup(change_basis,i)) { 5570 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5571 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5572 cum++; 5573 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5574 for (k=0;k<constraints_n[i];k++) { 5575 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5576 nnz[j+k] = size_of_constraint; 5577 } 5578 j += constraints_n[i]; 5579 } 5580 } 5581 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5582 ierr = PetscFree(nnz);CHKERRQ(ierr); 5583 5584 /* set values in constraint matrix */ 5585 for (i=0;i<total_primal_vertices;i++) { 5586 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5587 } 5588 total_counts = total_primal_vertices; 5589 for (i=n_vertices;i<total_counts_cc;i++) { 5590 if (!PetscBTLookup(change_basis,i)) { 5591 PetscInt *cols; 5592 5593 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5594 cols = constraints_idxs+constraints_idxs_ptr[i]; 5595 for (k=0;k<constraints_n[i];k++) { 5596 PetscInt row = total_counts+k; 5597 PetscScalar *vals; 5598 5599 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5600 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5601 } 5602 total_counts += constraints_n[i]; 5603 } 5604 } 5605 /* assembling */ 5606 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5607 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5608 5609 /* 5610 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5611 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5612 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5613 */ 5614 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5615 if (pcbddc->use_change_of_basis) { 5616 /* dual and primal dofs on a single cc */ 5617 PetscInt dual_dofs,primal_dofs; 5618 /* working stuff for GEQRF */ 5619 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5620 PetscBLASInt lqr_work; 5621 /* working stuff for UNGQR */ 5622 PetscScalar *gqr_work,lgqr_work_t; 5623 PetscBLASInt lgqr_work; 5624 /* working stuff for TRTRS */ 5625 PetscScalar *trs_rhs; 5626 PetscBLASInt Blas_NRHS; 5627 /* pointers for values insertion into change of basis matrix */ 5628 PetscInt *start_rows,*start_cols; 5629 PetscScalar *start_vals; 5630 /* working stuff for values insertion */ 5631 PetscBT is_primal; 5632 PetscInt *aux_primal_numbering_B; 5633 /* matrix sizes */ 5634 PetscInt global_size,local_size; 5635 /* temporary change of basis */ 5636 Mat localChangeOfBasisMatrix; 5637 /* extra space for debugging */ 5638 PetscScalar *dbg_work; 5639 5640 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5641 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5642 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5643 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5644 /* nonzeros for local mat */ 5645 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5646 if (!pcbddc->benign_change || pcbddc->fake_change) { 5647 for (i=0;i<pcis->n;i++) nnz[i]=1; 5648 } else { 5649 const PetscInt *ii; 5650 PetscInt n; 5651 PetscBool flg_row; 5652 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5653 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5654 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5655 } 5656 for (i=n_vertices;i<total_counts_cc;i++) { 5657 if (PetscBTLookup(change_basis,i)) { 5658 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5659 if (PetscBTLookup(qr_needed_idx,i)) { 5660 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5661 } else { 5662 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5663 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5664 } 5665 } 5666 } 5667 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5668 ierr = PetscFree(nnz);CHKERRQ(ierr); 5669 /* Set interior change in the matrix */ 5670 if (!pcbddc->benign_change || pcbddc->fake_change) { 5671 for (i=0;i<pcis->n;i++) { 5672 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5673 } 5674 } else { 5675 const PetscInt *ii,*jj; 5676 PetscScalar *aa; 5677 PetscInt n; 5678 PetscBool flg_row; 5679 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5680 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5681 for (i=0;i<n;i++) { 5682 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5683 } 5684 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5685 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5686 } 5687 5688 if (pcbddc->dbg_flag) { 5689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5691 } 5692 5693 5694 /* Now we loop on the constraints which need a change of basis */ 5695 /* 5696 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5697 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5698 5699 Basic blocks of change of basis matrix T computed by 5700 5701 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5702 5703 | 1 0 ... 0 s_1/S | 5704 | 0 1 ... 0 s_2/S | 5705 | ... | 5706 | 0 ... 1 s_{n-1}/S | 5707 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5708 5709 with S = \sum_{i=1}^n s_i^2 5710 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5711 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5712 5713 - QR decomposition of constraints otherwise 5714 */ 5715 if (qr_needed) { 5716 /* space to store Q */ 5717 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5718 /* array to store scaling factors for reflectors */ 5719 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5720 /* first we issue queries for optimal work */ 5721 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5722 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5723 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5724 lqr_work = -1; 5725 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5726 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5727 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5728 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5729 lgqr_work = -1; 5730 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5731 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5732 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5733 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5734 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5735 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5736 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5737 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5738 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5739 /* array to store rhs and solution of triangular solver */ 5740 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5741 /* allocating workspace for check */ 5742 if (pcbddc->dbg_flag) { 5743 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5744 } 5745 } 5746 /* array to store whether a node is primal or not */ 5747 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5748 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5749 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5750 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5751 for (i=0;i<total_primal_vertices;i++) { 5752 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5753 } 5754 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5755 5756 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5757 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5758 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5759 if (PetscBTLookup(change_basis,total_counts)) { 5760 /* get constraint info */ 5761 primal_dofs = constraints_n[total_counts]; 5762 dual_dofs = size_of_constraint-primal_dofs; 5763 5764 if (pcbddc->dbg_flag) { 5765 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5766 } 5767 5768 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5769 5770 /* copy quadrature constraints for change of basis check */ 5771 if (pcbddc->dbg_flag) { 5772 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5773 } 5774 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5775 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5776 5777 /* compute QR decomposition of constraints */ 5778 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5779 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5781 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5782 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5783 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5784 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5785 5786 /* explictly compute R^-T */ 5787 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5788 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5789 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5790 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5791 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5792 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5793 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5794 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5795 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5796 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5797 5798 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5799 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5800 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5801 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5802 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5803 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5804 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5805 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5806 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5807 5808 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5809 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5810 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5811 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5812 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5813 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5814 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5815 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5817 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5818 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5819 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5820 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5821 5822 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5823 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5824 /* insert cols for primal dofs */ 5825 for (j=0;j<primal_dofs;j++) { 5826 start_vals = &qr_basis[j*size_of_constraint]; 5827 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5828 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5829 } 5830 /* insert cols for dual dofs */ 5831 for (j=0,k=0;j<dual_dofs;k++) { 5832 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5833 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5834 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5835 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5836 j++; 5837 } 5838 } 5839 5840 /* check change of basis */ 5841 if (pcbddc->dbg_flag) { 5842 PetscInt ii,jj; 5843 PetscBool valid_qr=PETSC_TRUE; 5844 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5845 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5846 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5847 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5848 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5849 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5850 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5851 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5852 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5853 for (jj=0;jj<size_of_constraint;jj++) { 5854 for (ii=0;ii<primal_dofs;ii++) { 5855 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5856 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5857 } 5858 } 5859 if (!valid_qr) { 5860 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5861 for (jj=0;jj<size_of_constraint;jj++) { 5862 for (ii=0;ii<primal_dofs;ii++) { 5863 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5864 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5865 } 5866 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5867 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5868 } 5869 } 5870 } 5871 } else { 5872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5873 } 5874 } 5875 } else { /* simple transformation block */ 5876 PetscInt row,col; 5877 PetscScalar val,norm; 5878 5879 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5880 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5881 for (j=0;j<size_of_constraint;j++) { 5882 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5883 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5884 if (!PetscBTLookup(is_primal,row_B)) { 5885 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5886 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5887 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5888 } else { 5889 for (k=0;k<size_of_constraint;k++) { 5890 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5891 if (row != col) { 5892 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5893 } else { 5894 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5895 } 5896 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5897 } 5898 } 5899 } 5900 if (pcbddc->dbg_flag) { 5901 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5902 } 5903 } 5904 } else { 5905 if (pcbddc->dbg_flag) { 5906 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5907 } 5908 } 5909 } 5910 5911 /* free workspace */ 5912 if (qr_needed) { 5913 if (pcbddc->dbg_flag) { 5914 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5915 } 5916 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5917 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5918 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5919 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5920 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5921 } 5922 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5923 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5924 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5925 5926 /* assembling of global change of variable */ 5927 if (!pcbddc->fake_change) { 5928 Mat tmat; 5929 PetscInt bs; 5930 5931 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5932 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5933 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5934 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5935 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5936 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5937 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5938 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5939 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5940 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5941 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5942 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5943 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5944 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5945 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5946 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5947 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5948 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5949 5950 /* check */ 5951 if (pcbddc->dbg_flag) { 5952 PetscReal error; 5953 Vec x,x_change; 5954 5955 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5956 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5957 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5958 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5959 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5960 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5961 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5962 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5963 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5964 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5965 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5966 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5968 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5969 ierr = VecDestroy(&x);CHKERRQ(ierr); 5970 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5971 } 5972 /* adapt sub_schurs computed (if any) */ 5973 if (pcbddc->use_deluxe_scaling) { 5974 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5975 5976 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5977 if (sub_schurs && sub_schurs->S_Ej_all) { 5978 Mat S_new,tmat; 5979 IS is_all_N,is_V_Sall = NULL; 5980 5981 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5982 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5983 if (pcbddc->deluxe_zerorows) { 5984 ISLocalToGlobalMapping NtoSall; 5985 IS is_V; 5986 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5987 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5988 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5989 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5990 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5991 } 5992 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5993 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5994 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5995 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5996 if (pcbddc->deluxe_zerorows) { 5997 const PetscScalar *array; 5998 const PetscInt *idxs_V,*idxs_all; 5999 PetscInt i,n_V; 6000 6001 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6002 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6003 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6004 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6005 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6006 for (i=0;i<n_V;i++) { 6007 PetscScalar val; 6008 PetscInt idx; 6009 6010 idx = idxs_V[i]; 6011 val = array[idxs_all[idxs_V[i]]]; 6012 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6013 } 6014 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6015 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6016 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6017 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6018 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6019 } 6020 sub_schurs->S_Ej_all = S_new; 6021 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6022 if (sub_schurs->sum_S_Ej_all) { 6023 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6024 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6025 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6026 if (pcbddc->deluxe_zerorows) { 6027 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6028 } 6029 sub_schurs->sum_S_Ej_all = S_new; 6030 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6031 } 6032 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6033 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6034 } 6035 /* destroy any change of basis context in sub_schurs */ 6036 if (sub_schurs && sub_schurs->change) { 6037 PetscInt i; 6038 6039 for (i=0;i<sub_schurs->n_subs;i++) { 6040 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6041 } 6042 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6043 } 6044 } 6045 if (pcbddc->switch_static) { /* need to save the local change */ 6046 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6047 } else { 6048 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6049 } 6050 /* determine if any process has changed the pressures locally */ 6051 pcbddc->change_interior = pcbddc->benign_have_null; 6052 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6053 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6054 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6055 pcbddc->use_qr_single = qr_needed; 6056 } 6057 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6058 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6059 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6060 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6061 } else { 6062 Mat benign_global = NULL; 6063 if (pcbddc->benign_have_null) { 6064 Mat tmat; 6065 6066 pcbddc->change_interior = PETSC_TRUE; 6067 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6068 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6069 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6070 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6071 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6072 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6073 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6074 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6075 if (pcbddc->benign_change) { 6076 Mat M; 6077 6078 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6079 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6080 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6081 ierr = MatDestroy(&M);CHKERRQ(ierr); 6082 } else { 6083 Mat eye; 6084 PetscScalar *array; 6085 6086 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6087 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6088 for (i=0;i<pcis->n;i++) { 6089 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6090 } 6091 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6092 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6093 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6094 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6095 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6096 } 6097 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6098 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6099 } 6100 if (pcbddc->user_ChangeOfBasisMatrix) { 6101 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6102 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6103 } else if (pcbddc->benign_have_null) { 6104 pcbddc->ChangeOfBasisMatrix = benign_global; 6105 } 6106 } 6107 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6108 IS is_global; 6109 const PetscInt *gidxs; 6110 6111 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6112 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6113 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6114 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6115 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6116 } 6117 } 6118 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6119 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6120 } 6121 6122 if (!pcbddc->fake_change) { 6123 /* add pressure dofs to set of primal nodes for numbering purposes */ 6124 for (i=0;i<pcbddc->benign_n;i++) { 6125 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6126 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6127 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6128 pcbddc->local_primal_size_cc++; 6129 pcbddc->local_primal_size++; 6130 } 6131 6132 /* check if a new primal space has been introduced (also take into account benign trick) */ 6133 pcbddc->new_primal_space_local = PETSC_TRUE; 6134 if (olocal_primal_size == pcbddc->local_primal_size) { 6135 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6136 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6137 if (!pcbddc->new_primal_space_local) { 6138 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6139 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6140 } 6141 } 6142 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6143 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6144 } 6145 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6146 6147 /* flush dbg viewer */ 6148 if (pcbddc->dbg_flag) { 6149 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6150 } 6151 6152 /* free workspace */ 6153 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6154 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6155 if (!pcbddc->adaptive_selection) { 6156 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6157 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6158 } else { 6159 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6160 pcbddc->adaptive_constraints_idxs_ptr, 6161 pcbddc->adaptive_constraints_data_ptr, 6162 pcbddc->adaptive_constraints_idxs, 6163 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6164 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6165 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6166 } 6167 PetscFunctionReturn(0); 6168 } 6169 6170 #undef __FUNCT__ 6171 #define __FUNCT__ "PCBDDCAnalyzeInterface" 6172 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6173 { 6174 ISLocalToGlobalMapping map; 6175 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6176 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6177 PetscInt ierr,i,N; 6178 6179 PetscFunctionBegin; 6180 if (pcbddc->recompute_topography) { 6181 pcbddc->graphanalyzed = PETSC_FALSE; 6182 /* Reset previously computed graph */ 6183 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6184 /* Init local Graph struct */ 6185 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6186 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6187 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6188 6189 /* Check validity of the csr graph passed in by the user */ 6190 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6191 6192 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6193 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 6194 PetscInt *xadj,*adjncy; 6195 PetscInt nvtxs; 6196 PetscBool flg_row=PETSC_FALSE; 6197 6198 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6199 if (flg_row) { 6200 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6201 pcbddc->computed_rowadj = PETSC_TRUE; 6202 } 6203 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6204 } 6205 if (pcbddc->dbg_flag) { 6206 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6207 } 6208 6209 /* Setup of Graph */ 6210 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6211 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6212 6213 /* attach info on disconnected subdomains if present */ 6214 if (pcbddc->n_local_subs) { 6215 PetscInt *local_subs; 6216 6217 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6218 for (i=0;i<pcbddc->n_local_subs;i++) { 6219 const PetscInt *idxs; 6220 PetscInt nl,j; 6221 6222 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6223 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6224 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6225 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6226 } 6227 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6228 pcbddc->mat_graph->local_subs = local_subs; 6229 } 6230 } 6231 6232 if (!pcbddc->graphanalyzed) { 6233 /* Graph's connected components analysis */ 6234 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6235 pcbddc->graphanalyzed = PETSC_TRUE; 6236 } 6237 PetscFunctionReturn(0); 6238 } 6239 6240 #undef __FUNCT__ 6241 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6242 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6243 { 6244 PetscInt i,j; 6245 PetscScalar *alphas; 6246 PetscErrorCode ierr; 6247 6248 PetscFunctionBegin; 6249 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6250 for (i=0;i<n;i++) { 6251 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6252 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6253 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6254 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6255 } 6256 ierr = PetscFree(alphas);CHKERRQ(ierr); 6257 PetscFunctionReturn(0); 6258 } 6259 6260 #undef __FUNCT__ 6261 #define __FUNCT__ "MatISGetSubassemblingPattern" 6262 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6263 { 6264 Mat A; 6265 PetscInt n_neighs,*neighs,*n_shared,**shared; 6266 PetscMPIInt size,rank,color; 6267 PetscInt *xadj,*adjncy; 6268 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6269 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 6270 PetscInt void_procs,*procs_candidates = NULL; 6271 PetscInt xadj_count, *count; 6272 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6273 PetscSubcomm psubcomm; 6274 MPI_Comm subcomm; 6275 PetscErrorCode ierr; 6276 6277 PetscFunctionBegin; 6278 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6279 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6280 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6281 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6282 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6283 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6284 6285 if (have_void) *have_void = PETSC_FALSE; 6286 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6287 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6288 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6289 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6290 im_active = !!(n); 6291 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6292 void_procs = size - active_procs; 6293 /* get ranks of of non-active processes in mat communicator */ 6294 if (void_procs) { 6295 PetscInt ncand; 6296 6297 if (have_void) *have_void = PETSC_TRUE; 6298 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6299 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6300 for (i=0,ncand=0;i<size;i++) { 6301 if (!procs_candidates[i]) { 6302 procs_candidates[ncand++] = i; 6303 } 6304 } 6305 /* force n_subdomains to be not greater that the number of non-active processes */ 6306 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6307 } 6308 6309 /* number of subdomains requested greater than active processes -> just shift the matrix 6310 number of subdomains requested 1 -> send to master or first candidate in voids */ 6311 if (active_procs < *n_subdomains || *n_subdomains == 1) { 6312 PetscInt issize,isidx,dest; 6313 if (*n_subdomains == 1) dest = 0; 6314 else dest = rank; 6315 if (im_active) { 6316 issize = 1; 6317 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6318 isidx = procs_candidates[dest]; 6319 } else { 6320 isidx = dest; 6321 } 6322 } else { 6323 issize = 0; 6324 isidx = -1; 6325 } 6326 *n_subdomains = active_procs; 6327 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6328 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6329 PetscFunctionReturn(0); 6330 } 6331 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6332 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6333 threshold = PetscMax(threshold,2); 6334 6335 /* Get info on mapping */ 6336 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6337 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6338 6339 /* build local CSR graph of subdomains' connectivity */ 6340 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6341 xadj[0] = 0; 6342 xadj[1] = PetscMax(n_neighs-1,0); 6343 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6344 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6345 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6346 for (i=1;i<n_neighs;i++) 6347 for (j=0;j<n_shared[i];j++) 6348 count[shared[i][j]] += 1; 6349 6350 xadj_count = 0; 6351 for (i=1;i<n_neighs;i++) { 6352 for (j=0;j<n_shared[i];j++) { 6353 if (count[shared[i][j]] < threshold) { 6354 adjncy[xadj_count] = neighs[i]; 6355 adjncy_wgt[xadj_count] = n_shared[i]; 6356 xadj_count++; 6357 break; 6358 } 6359 } 6360 } 6361 xadj[1] = xadj_count; 6362 ierr = PetscFree(count);CHKERRQ(ierr); 6363 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6364 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6365 6366 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6367 6368 /* Restrict work on active processes only */ 6369 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6370 if (void_procs) { 6371 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6372 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6373 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6374 subcomm = PetscSubcommChild(psubcomm); 6375 } else { 6376 psubcomm = NULL; 6377 subcomm = PetscObjectComm((PetscObject)mat); 6378 } 6379 6380 v_wgt = NULL; 6381 if (!color) { 6382 ierr = PetscFree(xadj);CHKERRQ(ierr); 6383 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6384 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6385 } else { 6386 Mat subdomain_adj; 6387 IS new_ranks,new_ranks_contig; 6388 MatPartitioning partitioner; 6389 PetscInt rstart=0,rend=0; 6390 PetscInt *is_indices,*oldranks; 6391 PetscMPIInt size; 6392 PetscBool aggregate; 6393 6394 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6395 if (void_procs) { 6396 PetscInt prank = rank; 6397 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6398 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6399 for (i=0;i<xadj[1];i++) { 6400 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6401 } 6402 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6403 } else { 6404 oldranks = NULL; 6405 } 6406 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6407 if (aggregate) { /* TODO: all this part could be made more efficient */ 6408 PetscInt lrows,row,ncols,*cols; 6409 PetscMPIInt nrank; 6410 PetscScalar *vals; 6411 6412 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6413 lrows = 0; 6414 if (nrank<redprocs) { 6415 lrows = size/redprocs; 6416 if (nrank<size%redprocs) lrows++; 6417 } 6418 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6419 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6420 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6421 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6422 row = nrank; 6423 ncols = xadj[1]-xadj[0]; 6424 cols = adjncy; 6425 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6426 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6427 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6428 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6429 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6430 ierr = PetscFree(xadj);CHKERRQ(ierr); 6431 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6432 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6433 ierr = PetscFree(vals);CHKERRQ(ierr); 6434 if (use_vwgt) { 6435 Vec v; 6436 const PetscScalar *array; 6437 PetscInt nl; 6438 6439 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6440 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6441 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6442 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6443 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6444 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6445 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6446 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6447 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6448 ierr = VecDestroy(&v);CHKERRQ(ierr); 6449 } 6450 } else { 6451 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6452 if (use_vwgt) { 6453 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6454 v_wgt[0] = local_size; 6455 } 6456 } 6457 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6458 6459 /* Partition */ 6460 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6461 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6462 if (v_wgt) { 6463 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6464 } 6465 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6466 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6467 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6468 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6469 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6470 6471 /* renumber new_ranks to avoid "holes" in new set of processors */ 6472 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6473 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6474 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6475 if (!aggregate) { 6476 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6477 #if defined(PETSC_USE_DEBUG) 6478 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6479 #endif 6480 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6481 } else if (oldranks) { 6482 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6483 } else { 6484 ranks_send_to_idx[0] = is_indices[0]; 6485 } 6486 } else { 6487 PetscInt idxs[1]; 6488 PetscMPIInt tag; 6489 MPI_Request *reqs; 6490 6491 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6492 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6493 for (i=rstart;i<rend;i++) { 6494 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6495 } 6496 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6497 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6498 ierr = PetscFree(reqs);CHKERRQ(ierr); 6499 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6500 #if defined(PETSC_USE_DEBUG) 6501 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6502 #endif 6503 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6504 } else if (oldranks) { 6505 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6506 } else { 6507 ranks_send_to_idx[0] = idxs[0]; 6508 } 6509 } 6510 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6511 /* clean up */ 6512 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6513 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6514 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6515 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6516 } 6517 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6518 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6519 6520 /* assemble parallel IS for sends */ 6521 i = 1; 6522 if (!color) i=0; 6523 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6524 PetscFunctionReturn(0); 6525 } 6526 6527 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6528 6529 #undef __FUNCT__ 6530 #define __FUNCT__ "PCBDDCMatISSubassemble" 6531 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6532 { 6533 Mat local_mat; 6534 IS is_sends_internal; 6535 PetscInt rows,cols,new_local_rows; 6536 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6537 PetscBool ismatis,isdense,newisdense,destroy_mat; 6538 ISLocalToGlobalMapping l2gmap; 6539 PetscInt* l2gmap_indices; 6540 const PetscInt* is_indices; 6541 MatType new_local_type; 6542 /* buffers */ 6543 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6544 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6545 PetscInt *recv_buffer_idxs_local; 6546 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6547 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6548 /* MPI */ 6549 MPI_Comm comm,comm_n; 6550 PetscSubcomm subcomm; 6551 PetscMPIInt n_sends,n_recvs,commsize; 6552 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6553 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6554 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6555 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6556 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6557 PetscErrorCode ierr; 6558 6559 PetscFunctionBegin; 6560 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6561 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6562 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6563 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6564 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6565 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6566 PetscValidLogicalCollectiveBool(mat,reuse,6); 6567 PetscValidLogicalCollectiveInt(mat,nis,8); 6568 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6569 if (nvecs) { 6570 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6571 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6572 } 6573 /* further checks */ 6574 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6575 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6576 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6577 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6578 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6579 if (reuse && *mat_n) { 6580 PetscInt mrows,mcols,mnrows,mncols; 6581 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6582 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6583 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6584 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6585 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6586 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6587 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6588 } 6589 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6590 PetscValidLogicalCollectiveInt(mat,bs,0); 6591 6592 /* prepare IS for sending if not provided */ 6593 if (!is_sends) { 6594 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6595 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6596 } else { 6597 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6598 is_sends_internal = is_sends; 6599 } 6600 6601 /* get comm */ 6602 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6603 6604 /* compute number of sends */ 6605 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6606 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6607 6608 /* compute number of receives */ 6609 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6610 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6611 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6612 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6613 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6614 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6615 ierr = PetscFree(iflags);CHKERRQ(ierr); 6616 6617 /* restrict comm if requested */ 6618 subcomm = 0; 6619 destroy_mat = PETSC_FALSE; 6620 if (restrict_comm) { 6621 PetscMPIInt color,subcommsize; 6622 6623 color = 0; 6624 if (restrict_full) { 6625 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6626 } else { 6627 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6628 } 6629 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6630 subcommsize = commsize - subcommsize; 6631 /* check if reuse has been requested */ 6632 if (reuse) { 6633 if (*mat_n) { 6634 PetscMPIInt subcommsize2; 6635 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6636 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6637 comm_n = PetscObjectComm((PetscObject)*mat_n); 6638 } else { 6639 comm_n = PETSC_COMM_SELF; 6640 } 6641 } else { /* MAT_INITIAL_MATRIX */ 6642 PetscMPIInt rank; 6643 6644 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6645 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6646 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6647 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6648 comm_n = PetscSubcommChild(subcomm); 6649 } 6650 /* flag to destroy *mat_n if not significative */ 6651 if (color) destroy_mat = PETSC_TRUE; 6652 } else { 6653 comm_n = comm; 6654 } 6655 6656 /* prepare send/receive buffers */ 6657 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6658 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6659 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6660 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6661 if (nis) { 6662 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6663 } 6664 6665 /* Get data from local matrices */ 6666 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6667 /* TODO: See below some guidelines on how to prepare the local buffers */ 6668 /* 6669 send_buffer_vals should contain the raw values of the local matrix 6670 send_buffer_idxs should contain: 6671 - MatType_PRIVATE type 6672 - PetscInt size_of_l2gmap 6673 - PetscInt global_row_indices[size_of_l2gmap] 6674 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6675 */ 6676 else { 6677 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6678 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6679 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6680 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6681 send_buffer_idxs[1] = i; 6682 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6683 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6684 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6685 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6686 for (i=0;i<n_sends;i++) { 6687 ilengths_vals[is_indices[i]] = len*len; 6688 ilengths_idxs[is_indices[i]] = len+2; 6689 } 6690 } 6691 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6692 /* additional is (if any) */ 6693 if (nis) { 6694 PetscMPIInt psum; 6695 PetscInt j; 6696 for (j=0,psum=0;j<nis;j++) { 6697 PetscInt plen; 6698 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6699 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6700 psum += len+1; /* indices + lenght */ 6701 } 6702 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6703 for (j=0,psum=0;j<nis;j++) { 6704 PetscInt plen; 6705 const PetscInt *is_array_idxs; 6706 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6707 send_buffer_idxs_is[psum] = plen; 6708 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6709 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6710 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6711 psum += plen+1; /* indices + lenght */ 6712 } 6713 for (i=0;i<n_sends;i++) { 6714 ilengths_idxs_is[is_indices[i]] = psum; 6715 } 6716 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6717 } 6718 6719 buf_size_idxs = 0; 6720 buf_size_vals = 0; 6721 buf_size_idxs_is = 0; 6722 buf_size_vecs = 0; 6723 for (i=0;i<n_recvs;i++) { 6724 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6725 buf_size_vals += (PetscInt)olengths_vals[i]; 6726 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6727 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6728 } 6729 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6730 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6731 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6732 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6733 6734 /* get new tags for clean communications */ 6735 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6736 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6737 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6739 6740 /* allocate for requests */ 6741 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6742 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6743 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6744 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6745 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6746 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6747 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6748 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6749 6750 /* communications */ 6751 ptr_idxs = recv_buffer_idxs; 6752 ptr_vals = recv_buffer_vals; 6753 ptr_idxs_is = recv_buffer_idxs_is; 6754 ptr_vecs = recv_buffer_vecs; 6755 for (i=0;i<n_recvs;i++) { 6756 source_dest = onodes[i]; 6757 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6758 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6759 ptr_idxs += olengths_idxs[i]; 6760 ptr_vals += olengths_vals[i]; 6761 if (nis) { 6762 source_dest = onodes_is[i]; 6763 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6764 ptr_idxs_is += olengths_idxs_is[i]; 6765 } 6766 if (nvecs) { 6767 source_dest = onodes[i]; 6768 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6769 ptr_vecs += olengths_idxs[i]-2; 6770 } 6771 } 6772 for (i=0;i<n_sends;i++) { 6773 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6774 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6775 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6776 if (nis) { 6777 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6778 } 6779 if (nvecs) { 6780 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6781 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6782 } 6783 } 6784 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6785 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6786 6787 /* assemble new l2g map */ 6788 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6789 ptr_idxs = recv_buffer_idxs; 6790 new_local_rows = 0; 6791 for (i=0;i<n_recvs;i++) { 6792 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6793 ptr_idxs += olengths_idxs[i]; 6794 } 6795 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6796 ptr_idxs = recv_buffer_idxs; 6797 new_local_rows = 0; 6798 for (i=0;i<n_recvs;i++) { 6799 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6800 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6801 ptr_idxs += olengths_idxs[i]; 6802 } 6803 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6804 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6805 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6806 6807 /* infer new local matrix type from received local matrices type */ 6808 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6809 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6810 if (n_recvs) { 6811 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6812 ptr_idxs = recv_buffer_idxs; 6813 for (i=0;i<n_recvs;i++) { 6814 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6815 new_local_type_private = MATAIJ_PRIVATE; 6816 break; 6817 } 6818 ptr_idxs += olengths_idxs[i]; 6819 } 6820 switch (new_local_type_private) { 6821 case MATDENSE_PRIVATE: 6822 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6823 new_local_type = MATSEQAIJ; 6824 bs = 1; 6825 } else { /* if I receive only 1 dense matrix */ 6826 new_local_type = MATSEQDENSE; 6827 bs = 1; 6828 } 6829 break; 6830 case MATAIJ_PRIVATE: 6831 new_local_type = MATSEQAIJ; 6832 bs = 1; 6833 break; 6834 case MATBAIJ_PRIVATE: 6835 new_local_type = MATSEQBAIJ; 6836 break; 6837 case MATSBAIJ_PRIVATE: 6838 new_local_type = MATSEQSBAIJ; 6839 break; 6840 default: 6841 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6842 break; 6843 } 6844 } else { /* by default, new_local_type is seqdense */ 6845 new_local_type = MATSEQDENSE; 6846 bs = 1; 6847 } 6848 6849 /* create MATIS object if needed */ 6850 if (!reuse) { 6851 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6852 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6853 } else { 6854 /* it also destroys the local matrices */ 6855 if (*mat_n) { 6856 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6857 } else { /* this is a fake object */ 6858 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6859 } 6860 } 6861 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6862 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6863 6864 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6865 6866 /* Global to local map of received indices */ 6867 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6868 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6869 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6870 6871 /* restore attributes -> type of incoming data and its size */ 6872 buf_size_idxs = 0; 6873 for (i=0;i<n_recvs;i++) { 6874 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6875 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6876 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6877 } 6878 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6879 6880 /* set preallocation */ 6881 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6882 if (!newisdense) { 6883 PetscInt *new_local_nnz=0; 6884 6885 ptr_idxs = recv_buffer_idxs_local; 6886 if (n_recvs) { 6887 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6888 } 6889 for (i=0;i<n_recvs;i++) { 6890 PetscInt j; 6891 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6892 for (j=0;j<*(ptr_idxs+1);j++) { 6893 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6894 } 6895 } else { 6896 /* TODO */ 6897 } 6898 ptr_idxs += olengths_idxs[i]; 6899 } 6900 if (new_local_nnz) { 6901 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6902 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6903 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6904 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6905 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6906 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6907 } else { 6908 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6909 } 6910 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6911 } else { 6912 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6913 } 6914 6915 /* set values */ 6916 ptr_vals = recv_buffer_vals; 6917 ptr_idxs = recv_buffer_idxs_local; 6918 for (i=0;i<n_recvs;i++) { 6919 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6920 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6921 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6922 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6923 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6924 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6925 } else { 6926 /* TODO */ 6927 } 6928 ptr_idxs += olengths_idxs[i]; 6929 ptr_vals += olengths_vals[i]; 6930 } 6931 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6932 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6933 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6934 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6935 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6936 6937 #if 0 6938 if (!restrict_comm) { /* check */ 6939 Vec lvec,rvec; 6940 PetscReal infty_error; 6941 6942 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6943 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6944 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6945 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6946 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6947 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6948 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6949 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6950 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6951 } 6952 #endif 6953 6954 /* assemble new additional is (if any) */ 6955 if (nis) { 6956 PetscInt **temp_idxs,*count_is,j,psum; 6957 6958 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6959 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6960 ptr_idxs = recv_buffer_idxs_is; 6961 psum = 0; 6962 for (i=0;i<n_recvs;i++) { 6963 for (j=0;j<nis;j++) { 6964 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6965 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6966 psum += plen; 6967 ptr_idxs += plen+1; /* shift pointer to received data */ 6968 } 6969 } 6970 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6971 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6972 for (i=1;i<nis;i++) { 6973 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6974 } 6975 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6976 ptr_idxs = recv_buffer_idxs_is; 6977 for (i=0;i<n_recvs;i++) { 6978 for (j=0;j<nis;j++) { 6979 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6980 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6981 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6982 ptr_idxs += plen+1; /* shift pointer to received data */ 6983 } 6984 } 6985 for (i=0;i<nis;i++) { 6986 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6987 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6988 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6989 } 6990 ierr = PetscFree(count_is);CHKERRQ(ierr); 6991 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6992 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6993 } 6994 /* free workspace */ 6995 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6996 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6997 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6998 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6999 if (isdense) { 7000 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7001 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7002 } else { 7003 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7004 } 7005 if (nis) { 7006 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7007 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7008 } 7009 7010 if (nvecs) { 7011 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7012 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7013 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7014 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7015 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7016 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7017 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7018 /* set values */ 7019 ptr_vals = recv_buffer_vecs; 7020 ptr_idxs = recv_buffer_idxs_local; 7021 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7022 for (i=0;i<n_recvs;i++) { 7023 PetscInt j; 7024 for (j=0;j<*(ptr_idxs+1);j++) { 7025 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7026 } 7027 ptr_idxs += olengths_idxs[i]; 7028 ptr_vals += olengths_idxs[i]-2; 7029 } 7030 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7031 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7032 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7033 } 7034 7035 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7036 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7037 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7038 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7039 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7040 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7041 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7042 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7043 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7044 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7045 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7046 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7047 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7048 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7049 ierr = PetscFree(onodes);CHKERRQ(ierr); 7050 if (nis) { 7051 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7052 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7053 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7054 } 7055 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7056 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7057 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7058 for (i=0;i<nis;i++) { 7059 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7060 } 7061 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7062 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7063 } 7064 *mat_n = NULL; 7065 } 7066 PetscFunctionReturn(0); 7067 } 7068 7069 /* temporary hack into ksp private data structure */ 7070 #include <petsc/private/kspimpl.h> 7071 7072 #undef __FUNCT__ 7073 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 7074 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7075 { 7076 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7077 PC_IS *pcis = (PC_IS*)pc->data; 7078 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7079 Mat coarsedivudotp = NULL; 7080 Mat coarseG,t_coarse_mat_is; 7081 MatNullSpace CoarseNullSpace = NULL; 7082 ISLocalToGlobalMapping coarse_islg; 7083 IS coarse_is,*isarray; 7084 PetscInt i,im_active=-1,active_procs=-1; 7085 PetscInt nis,nisdofs,nisneu,nisvert; 7086 PC pc_temp; 7087 PCType coarse_pc_type; 7088 KSPType coarse_ksp_type; 7089 PetscBool multilevel_requested,multilevel_allowed; 7090 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7091 PetscInt ncoarse,nedcfield; 7092 PetscBool compute_vecs = PETSC_FALSE; 7093 PetscScalar *array; 7094 MatReuse coarse_mat_reuse; 7095 PetscBool restr, full_restr, have_void; 7096 PetscErrorCode ierr; 7097 7098 PetscFunctionBegin; 7099 /* Assign global numbering to coarse dofs */ 7100 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7101 PetscInt ocoarse_size; 7102 compute_vecs = PETSC_TRUE; 7103 ocoarse_size = pcbddc->coarse_size; 7104 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7105 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7106 /* see if we can avoid some work */ 7107 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7108 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7109 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7110 PC pc; 7111 PetscBool isbddc; 7112 7113 /* temporary workaround since PCBDDC does not have a reset method so far */ 7114 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 7115 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 7116 if (isbddc) { 7117 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 7118 } else { 7119 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7120 } 7121 coarse_reuse = PETSC_FALSE; 7122 } else { /* we can safely reuse already computed coarse matrix */ 7123 coarse_reuse = PETSC_TRUE; 7124 } 7125 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7126 coarse_reuse = PETSC_FALSE; 7127 } 7128 /* reset any subassembling information */ 7129 if (!coarse_reuse || pcbddc->recompute_topography) { 7130 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7131 } 7132 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7133 coarse_reuse = PETSC_TRUE; 7134 } 7135 /* assemble coarse matrix */ 7136 if (coarse_reuse && pcbddc->coarse_ksp) { 7137 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7138 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7139 coarse_mat_reuse = MAT_REUSE_MATRIX; 7140 } else { 7141 coarse_mat = NULL; 7142 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7143 } 7144 7145 /* creates temporary l2gmap and IS for coarse indexes */ 7146 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7147 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7148 7149 /* creates temporary MATIS object for coarse matrix */ 7150 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7151 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7152 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7153 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7154 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7155 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7156 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7157 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7158 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7159 7160 /* count "active" (i.e. with positive local size) and "void" processes */ 7161 im_active = !!(pcis->n); 7162 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7163 7164 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7165 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7166 /* full_restr : just use the receivers from the subassembling pattern */ 7167 coarse_mat_is = NULL; 7168 multilevel_allowed = PETSC_FALSE; 7169 multilevel_requested = PETSC_FALSE; 7170 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7171 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7172 if (multilevel_requested) { 7173 ncoarse = active_procs/pcbddc->coarsening_ratio; 7174 restr = PETSC_FALSE; 7175 full_restr = PETSC_FALSE; 7176 } else { 7177 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7178 restr = PETSC_TRUE; 7179 full_restr = PETSC_TRUE; 7180 } 7181 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7182 ncoarse = PetscMax(1,ncoarse); 7183 if (!pcbddc->coarse_subassembling) { 7184 if (pcbddc->coarsening_ratio > 1) { 7185 ierr = MatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7186 } else { 7187 PetscMPIInt size,rank; 7188 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7189 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7190 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7191 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7192 } 7193 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7194 PetscInt psum; 7195 PetscMPIInt size; 7196 if (pcbddc->coarse_ksp) psum = 1; 7197 else psum = 0; 7198 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7199 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7200 if (ncoarse < size) have_void = PETSC_TRUE; 7201 } 7202 /* determine if we can go multilevel */ 7203 if (multilevel_requested) { 7204 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7205 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7206 } 7207 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7208 7209 /* dump subassembling pattern */ 7210 if (pcbddc->dbg_flag && multilevel_allowed) { 7211 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7212 } 7213 7214 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7215 nedcfield = -1; 7216 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7217 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7218 const PetscInt *idxs; 7219 ISLocalToGlobalMapping tmap; 7220 7221 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7222 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7223 /* allocate space for temporary storage */ 7224 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7225 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7226 /* allocate for IS array */ 7227 nisdofs = pcbddc->n_ISForDofsLocal; 7228 if (pcbddc->nedclocal) { 7229 if (pcbddc->nedfield > -1) { 7230 nedcfield = pcbddc->nedfield; 7231 } else { 7232 nedcfield = 0; 7233 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7234 nisdofs = 1; 7235 } 7236 } 7237 nisneu = !!pcbddc->NeumannBoundariesLocal; 7238 nisvert = 0; /* nisvert is not used */ 7239 nis = nisdofs + nisneu + nisvert; 7240 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7241 /* dofs splitting */ 7242 for (i=0;i<nisdofs;i++) { 7243 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7244 if (nedcfield != i) { 7245 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7246 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7247 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7248 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7249 } else { 7250 ierr = ISView(pcbddc->nedclocal,NULL);CHKERRQ(ierr); 7251 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7252 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7253 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7254 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7255 } 7256 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7257 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7258 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7259 } 7260 /* neumann boundaries */ 7261 if (pcbddc->NeumannBoundariesLocal) { 7262 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7263 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7264 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7265 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7266 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7267 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7268 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7269 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7270 } 7271 /* free memory */ 7272 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7273 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7274 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7275 } else { 7276 nis = 0; 7277 nisdofs = 0; 7278 nisneu = 0; 7279 nisvert = 0; 7280 isarray = NULL; 7281 } 7282 /* destroy no longer needed map */ 7283 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7284 7285 /* subassemble */ 7286 if (multilevel_allowed) { 7287 Vec vp[1]; 7288 PetscInt nvecs = 0; 7289 PetscBool reuse,reuser; 7290 7291 if (coarse_mat) reuse = PETSC_TRUE; 7292 else reuse = PETSC_FALSE; 7293 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7294 vp[0] = NULL; 7295 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7296 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7297 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7298 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7299 nvecs = 1; 7300 7301 if (pcbddc->divudotp) { 7302 Mat B,loc_divudotp; 7303 Vec v,p; 7304 IS dummy; 7305 PetscInt np; 7306 7307 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7308 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7309 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7310 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7311 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7312 ierr = VecSet(p,1.);CHKERRQ(ierr); 7313 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7314 ierr = VecDestroy(&p);CHKERRQ(ierr); 7315 ierr = MatDestroy(&B);CHKERRQ(ierr); 7316 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7317 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7318 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7319 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7320 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7321 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7322 ierr = VecDestroy(&v);CHKERRQ(ierr); 7323 } 7324 } 7325 if (reuser) { 7326 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7327 } else { 7328 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7329 } 7330 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7331 PetscScalar *arraym,*arrayv; 7332 PetscInt nl; 7333 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7334 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7335 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7336 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7337 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7338 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7339 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7340 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7341 } else { 7342 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7343 } 7344 } else { 7345 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7346 } 7347 if (coarse_mat_is || coarse_mat) { 7348 PetscMPIInt size; 7349 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7350 if (!multilevel_allowed) { 7351 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7352 } else { 7353 Mat A; 7354 7355 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7356 if (coarse_mat_is) { 7357 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7358 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7359 coarse_mat = coarse_mat_is; 7360 } 7361 /* be sure we don't have MatSeqDENSE as local mat */ 7362 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7363 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7364 } 7365 } 7366 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7367 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7368 7369 /* create local to global scatters for coarse problem */ 7370 if (compute_vecs) { 7371 PetscInt lrows; 7372 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7373 if (coarse_mat) { 7374 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7375 } else { 7376 lrows = 0; 7377 } 7378 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7379 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7380 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7381 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7382 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7383 } 7384 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7385 7386 /* set defaults for coarse KSP and PC */ 7387 if (multilevel_allowed) { 7388 coarse_ksp_type = KSPRICHARDSON; 7389 coarse_pc_type = PCBDDC; 7390 } else { 7391 coarse_ksp_type = KSPPREONLY; 7392 coarse_pc_type = PCREDUNDANT; 7393 } 7394 7395 /* print some info if requested */ 7396 if (pcbddc->dbg_flag) { 7397 if (!multilevel_allowed) { 7398 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7399 if (multilevel_requested) { 7400 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7401 } else if (pcbddc->max_levels) { 7402 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7403 } 7404 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7405 } 7406 } 7407 7408 /* communicate coarse discrete gradient */ 7409 coarseG = NULL; 7410 if (pcbddc->nedcG && multilevel_allowed) { 7411 MPI_Comm ccomm; 7412 if (coarse_mat) { 7413 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7414 } else { 7415 ccomm = MPI_COMM_NULL; 7416 } 7417 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7418 } 7419 7420 /* create the coarse KSP object only once with defaults */ 7421 if (coarse_mat) { 7422 PetscViewer dbg_viewer = NULL; 7423 if (pcbddc->dbg_flag) { 7424 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7425 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7426 } 7427 if (!pcbddc->coarse_ksp) { 7428 char prefix[256],str_level[16]; 7429 size_t len; 7430 7431 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7432 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7433 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7434 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7435 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7436 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7437 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7438 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7439 /* TODO is this logic correct? should check for coarse_mat type */ 7440 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7441 /* prefix */ 7442 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7443 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7444 if (!pcbddc->current_level) { 7445 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7446 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7447 } else { 7448 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7449 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7450 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7451 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7452 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7453 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7454 } 7455 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7456 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7457 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7458 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7459 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7460 /* allow user customization */ 7461 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7462 } 7463 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7464 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7465 if (nisdofs) { 7466 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7467 for (i=0;i<nisdofs;i++) { 7468 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7469 } 7470 } 7471 if (nisneu) { 7472 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7473 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7474 } 7475 if (nisvert) { 7476 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7477 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7478 } 7479 if (coarseG) { 7480 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7481 } 7482 7483 /* get some info after set from options */ 7484 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7485 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7486 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7487 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7488 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7489 isbddc = PETSC_FALSE; 7490 } 7491 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7492 if (isredundant) { 7493 KSP inner_ksp; 7494 PC inner_pc; 7495 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7496 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7497 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7498 } 7499 7500 /* parameters which miss an API */ 7501 if (isbddc) { 7502 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7503 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7504 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7505 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7506 if (pcbddc_coarse->benign_saddle_point) { 7507 Mat coarsedivudotp_is; 7508 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7509 IS row,col; 7510 const PetscInt *gidxs; 7511 PetscInt n,st,M,N; 7512 7513 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7514 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7515 st = st-n; 7516 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7517 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7518 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7519 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7520 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7521 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7522 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7523 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7524 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7525 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7526 ierr = ISDestroy(&row);CHKERRQ(ierr); 7527 ierr = ISDestroy(&col);CHKERRQ(ierr); 7528 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7529 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7530 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7531 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7532 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7533 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7534 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7535 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7536 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7537 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7538 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7539 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7540 } 7541 } 7542 7543 /* propagate symmetry info of coarse matrix */ 7544 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7545 if (pc->pmat->symmetric_set) { 7546 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7547 } 7548 if (pc->pmat->hermitian_set) { 7549 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7550 } 7551 if (pc->pmat->spd_set) { 7552 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7553 } 7554 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7555 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7556 } 7557 /* set operators */ 7558 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7559 if (pcbddc->dbg_flag) { 7560 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7561 } 7562 } 7563 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7564 ierr = PetscFree(isarray);CHKERRQ(ierr); 7565 #if 0 7566 { 7567 PetscViewer viewer; 7568 char filename[256]; 7569 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7570 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7571 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7572 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7573 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7574 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7575 } 7576 #endif 7577 7578 if (pcbddc->coarse_ksp) { 7579 Vec crhs,csol; 7580 7581 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7582 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7583 if (!csol) { 7584 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7585 } 7586 if (!crhs) { 7587 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7588 } 7589 } 7590 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7591 7592 /* compute null space for coarse solver if the benign trick has been requested */ 7593 if (pcbddc->benign_null) { 7594 7595 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7596 for (i=0;i<pcbddc->benign_n;i++) { 7597 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7598 } 7599 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7600 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7601 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7602 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7603 if (coarse_mat) { 7604 Vec nullv; 7605 PetscScalar *array,*array2; 7606 PetscInt nl; 7607 7608 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7609 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7610 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7611 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7612 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7613 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7614 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7615 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7616 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7617 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7618 } 7619 } 7620 7621 if (pcbddc->coarse_ksp) { 7622 PetscBool ispreonly; 7623 7624 if (CoarseNullSpace) { 7625 PetscBool isnull; 7626 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7627 if (isnull) { 7628 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7629 } 7630 /* TODO: add local nullspaces (if any) */ 7631 } 7632 /* setup coarse ksp */ 7633 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7634 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7635 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7636 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7637 KSP check_ksp; 7638 KSPType check_ksp_type; 7639 PC check_pc; 7640 Vec check_vec,coarse_vec; 7641 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7642 PetscInt its; 7643 PetscBool compute_eigs; 7644 PetscReal *eigs_r,*eigs_c; 7645 PetscInt neigs; 7646 const char *prefix; 7647 7648 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7649 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7650 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7651 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7652 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7653 /* prevent from setup unneeded object */ 7654 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7655 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7656 if (ispreonly) { 7657 check_ksp_type = KSPPREONLY; 7658 compute_eigs = PETSC_FALSE; 7659 } else { 7660 check_ksp_type = KSPGMRES; 7661 compute_eigs = PETSC_TRUE; 7662 } 7663 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7664 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7665 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7666 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7667 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7668 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7669 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7670 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7671 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7672 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7673 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7674 /* create random vec */ 7675 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7676 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7677 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7678 /* solve coarse problem */ 7679 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7680 /* set eigenvalue estimation if preonly has not been requested */ 7681 if (compute_eigs) { 7682 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7683 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7684 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7685 if (neigs) { 7686 lambda_max = eigs_r[neigs-1]; 7687 lambda_min = eigs_r[0]; 7688 if (pcbddc->use_coarse_estimates) { 7689 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7690 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7691 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7692 } 7693 } 7694 } 7695 } 7696 7697 /* check coarse problem residual error */ 7698 if (pcbddc->dbg_flag) { 7699 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7700 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7701 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7702 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7703 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7704 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7705 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7706 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7707 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7708 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7709 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7710 if (CoarseNullSpace) { 7711 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7712 } 7713 if (compute_eigs) { 7714 PetscReal lambda_max_s,lambda_min_s; 7715 KSPConvergedReason reason; 7716 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7717 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7718 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7719 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7720 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7721 for (i=0;i<neigs;i++) { 7722 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7723 } 7724 } 7725 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7726 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7727 } 7728 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7729 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7730 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7731 if (compute_eigs) { 7732 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7733 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7734 } 7735 } 7736 } 7737 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7738 /* print additional info */ 7739 if (pcbddc->dbg_flag) { 7740 /* waits until all processes reaches this point */ 7741 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7742 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7743 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7744 } 7745 7746 /* free memory */ 7747 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7748 PetscFunctionReturn(0); 7749 } 7750 7751 #undef __FUNCT__ 7752 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7753 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7754 { 7755 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7756 PC_IS* pcis = (PC_IS*)pc->data; 7757 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7758 IS subset,subset_mult,subset_n; 7759 PetscInt local_size,coarse_size=0; 7760 PetscInt *local_primal_indices=NULL; 7761 const PetscInt *t_local_primal_indices; 7762 PetscErrorCode ierr; 7763 7764 PetscFunctionBegin; 7765 /* Compute global number of coarse dofs */ 7766 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7767 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7768 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7769 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7770 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7771 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7772 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7773 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7774 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7775 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7776 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7777 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7778 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7779 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7780 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7781 7782 /* check numbering */ 7783 if (pcbddc->dbg_flag) { 7784 PetscScalar coarsesum,*array,*array2; 7785 PetscInt i; 7786 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7787 7788 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7789 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7790 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7791 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7792 /* counter */ 7793 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7794 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7795 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7796 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7797 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7798 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7799 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7800 for (i=0;i<pcbddc->local_primal_size;i++) { 7801 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7802 } 7803 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7804 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7805 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7806 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7807 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7808 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7809 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7810 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7811 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7812 for (i=0;i<pcis->n;i++) { 7813 if (array[i] != 0.0 && array[i] != array2[i]) { 7814 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7815 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7816 set_error = PETSC_TRUE; 7817 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7819 } 7820 } 7821 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7822 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7823 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7824 for (i=0;i<pcis->n;i++) { 7825 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7826 } 7827 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7828 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7829 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7830 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7831 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7832 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7833 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7834 PetscInt *gidxs; 7835 7836 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7837 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7838 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7839 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7840 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7841 for (i=0;i<pcbddc->local_primal_size;i++) { 7842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7843 } 7844 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7845 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7846 } 7847 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7848 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7849 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7850 } 7851 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7852 /* get back data */ 7853 *coarse_size_n = coarse_size; 7854 *local_primal_indices_n = local_primal_indices; 7855 PetscFunctionReturn(0); 7856 } 7857 7858 #undef __FUNCT__ 7859 #define __FUNCT__ "PCBDDCGlobalToLocal" 7860 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7861 { 7862 IS localis_t; 7863 PetscInt i,lsize,*idxs,n; 7864 PetscScalar *vals; 7865 PetscErrorCode ierr; 7866 7867 PetscFunctionBegin; 7868 /* get indices in local ordering exploiting local to global map */ 7869 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7870 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7871 for (i=0;i<lsize;i++) vals[i] = 1.0; 7872 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7873 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7874 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7875 if (idxs) { /* multilevel guard */ 7876 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7877 } 7878 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7879 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7880 ierr = PetscFree(vals);CHKERRQ(ierr); 7881 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7882 /* now compute set in local ordering */ 7883 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7884 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7885 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7886 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7887 for (i=0,lsize=0;i<n;i++) { 7888 if (PetscRealPart(vals[i]) > 0.5) { 7889 lsize++; 7890 } 7891 } 7892 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7893 for (i=0,lsize=0;i<n;i++) { 7894 if (PetscRealPart(vals[i]) > 0.5) { 7895 idxs[lsize++] = i; 7896 } 7897 } 7898 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7899 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7900 *localis = localis_t; 7901 PetscFunctionReturn(0); 7902 } 7903 7904 #undef __FUNCT__ 7905 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7906 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7907 { 7908 PC_IS *pcis=(PC_IS*)pc->data; 7909 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7910 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7911 Mat S_j; 7912 PetscInt *used_xadj,*used_adjncy; 7913 PetscBool free_used_adj; 7914 PetscErrorCode ierr; 7915 7916 PetscFunctionBegin; 7917 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7918 free_used_adj = PETSC_FALSE; 7919 if (pcbddc->sub_schurs_layers == -1) { 7920 used_xadj = NULL; 7921 used_adjncy = NULL; 7922 } else { 7923 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7924 used_xadj = pcbddc->mat_graph->xadj; 7925 used_adjncy = pcbddc->mat_graph->adjncy; 7926 } else if (pcbddc->computed_rowadj) { 7927 used_xadj = pcbddc->mat_graph->xadj; 7928 used_adjncy = pcbddc->mat_graph->adjncy; 7929 } else { 7930 PetscBool flg_row=PETSC_FALSE; 7931 const PetscInt *xadj,*adjncy; 7932 PetscInt nvtxs; 7933 7934 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7935 if (flg_row) { 7936 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7937 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7938 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7939 free_used_adj = PETSC_TRUE; 7940 } else { 7941 pcbddc->sub_schurs_layers = -1; 7942 used_xadj = NULL; 7943 used_adjncy = NULL; 7944 } 7945 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7946 } 7947 } 7948 7949 /* setup sub_schurs data */ 7950 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7951 if (!sub_schurs->schur_explicit) { 7952 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7953 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7954 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7955 } else { 7956 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7957 PetscBool isseqaij,need_change = PETSC_FALSE; 7958 PetscInt benign_n; 7959 Mat change = NULL; 7960 Vec scaling = NULL; 7961 IS change_primal = NULL; 7962 7963 if (!pcbddc->use_vertices && reuse_solvers) { 7964 PetscInt n_vertices; 7965 7966 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7967 reuse_solvers = (PetscBool)!n_vertices; 7968 } 7969 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7970 if (!isseqaij) { 7971 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7972 if (matis->A == pcbddc->local_mat) { 7973 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7974 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7975 } else { 7976 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7977 } 7978 } 7979 if (!pcbddc->benign_change_explicit) { 7980 benign_n = pcbddc->benign_n; 7981 } else { 7982 benign_n = 0; 7983 } 7984 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7985 We need a global reduction to avoid possible deadlocks. 7986 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7987 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7988 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7989 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7990 need_change = (PetscBool)(!need_change); 7991 } 7992 /* If the user defines additional constraints, we import them here. 7993 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7994 if (need_change) { 7995 PC_IS *pcisf; 7996 PC_BDDC *pcbddcf; 7997 PC pcf; 7998 7999 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8000 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8001 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8002 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8003 /* hacks */ 8004 pcisf = (PC_IS*)pcf->data; 8005 pcisf->is_B_local = pcis->is_B_local; 8006 pcisf->vec1_N = pcis->vec1_N; 8007 pcisf->BtoNmap = pcis->BtoNmap; 8008 pcisf->n = pcis->n; 8009 pcisf->n_B = pcis->n_B; 8010 pcbddcf = (PC_BDDC*)pcf->data; 8011 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8012 pcbddcf->mat_graph = pcbddc->mat_graph; 8013 pcbddcf->use_faces = PETSC_TRUE; 8014 pcbddcf->use_change_of_basis = PETSC_TRUE; 8015 pcbddcf->use_change_on_faces = PETSC_TRUE; 8016 pcbddcf->use_qr_single = PETSC_TRUE; 8017 pcbddcf->fake_change = PETSC_TRUE; 8018 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8019 /* store information on primal vertices and change of basis (in local numbering) */ 8020 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8021 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8022 change = pcbddcf->ConstraintMatrix; 8023 pcbddcf->ConstraintMatrix = NULL; 8024 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8025 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8026 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8027 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8028 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8029 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8030 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8031 pcf->ops->destroy = NULL; 8032 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8033 } 8034 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8035 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8036 ierr = MatDestroy(&change);CHKERRQ(ierr); 8037 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8038 } 8039 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8040 8041 /* free adjacency */ 8042 if (free_used_adj) { 8043 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8044 } 8045 PetscFunctionReturn(0); 8046 } 8047 8048 #undef __FUNCT__ 8049 #define __FUNCT__ "PCBDDCInitSubSchurs" 8050 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8051 { 8052 PC_IS *pcis=(PC_IS*)pc->data; 8053 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8054 PCBDDCGraph graph; 8055 PetscErrorCode ierr; 8056 8057 PetscFunctionBegin; 8058 /* attach interface graph for determining subsets */ 8059 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8060 IS verticesIS,verticescomm; 8061 PetscInt vsize,*idxs; 8062 8063 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8064 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8065 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8066 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8067 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8068 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8069 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8070 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8071 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8072 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8073 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8074 } else { 8075 graph = pcbddc->mat_graph; 8076 } 8077 /* print some info */ 8078 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8079 IS vertices; 8080 PetscInt nv,nedges,nfaces; 8081 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8082 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8083 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8084 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8085 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8086 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8087 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8089 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8090 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8091 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8092 } 8093 8094 /* sub_schurs init */ 8095 if (!pcbddc->sub_schurs) { 8096 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8097 } 8098 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8099 8100 /* free graph struct */ 8101 if (pcbddc->sub_schurs_rebuild) { 8102 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8103 } 8104 PetscFunctionReturn(0); 8105 } 8106 8107 #undef __FUNCT__ 8108 #define __FUNCT__ "PCBDDCCheckOperator" 8109 PetscErrorCode PCBDDCCheckOperator(PC pc) 8110 { 8111 PC_IS *pcis=(PC_IS*)pc->data; 8112 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8113 PetscErrorCode ierr; 8114 8115 PetscFunctionBegin; 8116 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8117 IS zerodiag = NULL; 8118 Mat S_j,B0_B=NULL; 8119 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8120 PetscScalar *p0_check,*array,*array2; 8121 PetscReal norm; 8122 PetscInt i; 8123 8124 /* B0 and B0_B */ 8125 if (zerodiag) { 8126 IS dummy; 8127 8128 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8129 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8130 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8131 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8132 } 8133 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8134 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8135 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8136 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8137 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8138 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8139 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8140 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8141 /* S_j */ 8142 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8143 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8144 8145 /* mimic vector in \widetilde{W}_\Gamma */ 8146 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8147 /* continuous in primal space */ 8148 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8149 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8150 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8151 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8152 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8153 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8154 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8155 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8156 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8157 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8158 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8159 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8160 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8161 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8162 8163 /* assemble rhs for coarse problem */ 8164 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8165 /* local with Schur */ 8166 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8167 if (zerodiag) { 8168 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8169 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8170 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8171 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8172 } 8173 /* sum on primal nodes the local contributions */ 8174 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8175 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8176 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8177 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8178 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8179 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8180 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8181 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8182 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8183 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8184 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8185 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8186 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8187 /* scale primal nodes (BDDC sums contibutions) */ 8188 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8189 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8190 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8191 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8192 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8193 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8194 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8195 /* global: \widetilde{B0}_B w_\Gamma */ 8196 if (zerodiag) { 8197 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8198 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8199 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8200 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8201 } 8202 /* BDDC */ 8203 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8204 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8205 8206 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8207 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8208 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8209 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8210 for (i=0;i<pcbddc->benign_n;i++) { 8211 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8212 } 8213 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8214 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8215 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8216 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8217 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8218 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8219 } 8220 PetscFunctionReturn(0); 8221 } 8222 8223 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8224 #undef __FUNCT__ 8225 #define __FUNCT__ "MatMPIAIJRestrict" 8226 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8227 { 8228 Mat At; 8229 IS rows; 8230 PetscInt rst,ren; 8231 PetscErrorCode ierr; 8232 PetscLayout rmap; 8233 8234 PetscFunctionBegin; 8235 rst = ren = 0; 8236 if (ccomm != MPI_COMM_NULL) { 8237 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8238 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8239 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8240 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8241 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8242 } 8243 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8244 ierr = MatGetSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8245 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8246 8247 if (ccomm != MPI_COMM_NULL) { 8248 Mat_MPIAIJ *a,*b; 8249 IS from,to; 8250 Vec gvec; 8251 PetscInt lsize; 8252 8253 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8254 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8255 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8256 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8257 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8258 a = (Mat_MPIAIJ*)At->data; 8259 b = (Mat_MPIAIJ*)(*B)->data; 8260 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8261 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8262 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8263 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8264 b->A = a->A; 8265 b->B = a->B; 8266 8267 b->donotstash = a->donotstash; 8268 b->roworiented = a->roworiented; 8269 b->rowindices = 0; 8270 b->rowvalues = 0; 8271 b->getrowactive = PETSC_FALSE; 8272 8273 (*B)->rmap = rmap; 8274 (*B)->factortype = A->factortype; 8275 (*B)->assembled = PETSC_TRUE; 8276 (*B)->insertmode = NOT_SET_VALUES; 8277 (*B)->preallocated = PETSC_TRUE; 8278 8279 if (a->colmap) { 8280 #if defined(PETSC_USE_CTABLE) 8281 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8282 #else 8283 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8284 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8285 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8286 #endif 8287 } else b->colmap = 0; 8288 if (a->garray) { 8289 PetscInt len; 8290 len = a->B->cmap->n; 8291 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8292 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8293 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8294 } else b->garray = 0; 8295 8296 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8297 b->lvec = a->lvec; 8298 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8299 8300 /* cannot use VecScatterCopy */ 8301 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8302 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8303 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8304 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8305 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8306 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8307 ierr = ISDestroy(&from);CHKERRQ(ierr); 8308 ierr = ISDestroy(&to);CHKERRQ(ierr); 8309 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8310 ierr = PetscObjectSetName((PetscObject)*B,"coarse_restrict_G");CHKERRQ(ierr); 8311 ierr = MatView(*B,NULL);CHKERRQ(ierr); 8312 } 8313 ierr = MatDestroy(&At);CHKERRQ(ierr); 8314 PetscFunctionReturn(0); 8315 } 8316