1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* returns B s.t. range(B) _|_ range(A) */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatDense_OrthogonalComplement" 12 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 13 { 14 #if !defined(PETSC_USE_COMPLEX) 15 PetscScalar *uwork,*data,*U, ds = 0.; 16 PetscReal *sing; 17 PetscBLASInt bM,bN,lwork,lierr,di = 1; 18 PetscInt ulw,i,nr,nc,n; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 #if defined(PETSC_MISSING_LAPACK_GESVD) 23 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 24 #endif 25 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 26 if (!nr || !nc) PetscFunctionReturn(0); 27 28 /* workspace */ 29 if (!work) { 30 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 31 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 32 } else { 33 ulw = lw; 34 uwork = work; 35 } 36 n = PetscMin(nr,nc); 37 if (!rwork) { 38 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 39 } else { 40 sing = rwork; 41 } 42 43 /* SVD */ 44 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 48 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 49 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 50 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 51 ierr = PetscFPTrapPop();CHKERRQ(ierr); 52 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 53 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 54 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 55 if (!rwork) { 56 ierr = PetscFree(sing);CHKERRQ(ierr); 57 } 58 if (!work) { 59 ierr = PetscFree(uwork);CHKERRQ(ierr); 60 } 61 /* create B */ 62 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 63 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 65 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscFree(U);CHKERRQ(ierr); 67 #else 68 PetscFunctionBegin; 69 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 70 #endif 71 PetscFunctionReturn(0); 72 } 73 74 /* TODO REMOVE */ 75 #if defined(PRINT_GDET) 76 static int inc = 0; 77 static int lev = 0; 78 #endif 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 82 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 83 { 84 PetscErrorCode ierr; 85 Mat GE,GEd; 86 PetscInt rsize,csize,esize; 87 PetscScalar *ptr; 88 89 PetscFunctionBegin; 90 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 91 if (!esize) PetscFunctionReturn(0); 92 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 93 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 94 95 /* gradients */ 96 ptr = work + 5*esize; 97 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 99 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 100 ierr = MatDestroy(&GE);CHKERRQ(ierr); 101 102 /* constants */ 103 ptr += rsize*csize; 104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 105 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 107 ierr = MatDestroy(&GE);CHKERRQ(ierr); 108 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 109 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 110 111 if (corners) { 112 Mat GEc; 113 PetscScalar *vals,v; 114 115 ierr = MatGetSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 116 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 117 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 118 /* TODO fix me */ 119 v = PetscAbsScalar(vals[0]); 120 v = 1.; 121 cvals[0] = vals[0]/v; 122 cvals[1] = vals[1]/v; 123 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 124 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 125 #if defined(PRINT_GDET) 126 { 127 PetscViewer viewer; 128 char filename[256]; 129 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 130 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 131 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 132 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 133 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 134 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 135 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 136 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 137 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 138 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 139 } 140 #endif 141 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 142 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 143 } 144 145 PetscFunctionReturn(0); 146 } 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "PCBDDCNedelecSupport" 150 PetscErrorCode PCBDDCNedelecSupport(PC pc) 151 { 152 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 153 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 154 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 155 MatNullSpace nnsp; 156 Vec tvec,*quads; 157 PetscSF sfv; 158 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 159 MPI_Comm comm; 160 IS lned,primals,allprimals,nedfieldlocal; 161 IS *eedges,*extrows,*extcols,*alleedges; 162 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee,bter; 163 PetscScalar *vals,*work; 164 PetscReal *rwork; 165 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 166 PetscInt ne,nv,Lv,order,n,field; 167 PetscInt n_neigh,*neigh,*n_shared,**shared; 168 PetscInt i,j,extmem,cum,maxsize,nee,nquads=2; 169 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 170 PetscInt *sfvleaves,*sfvroots; 171 PetscInt *corners,*cedges; 172 #if defined(PETSC_USE_DEBUG) 173 PetscInt *emarks; 174 #endif 175 PetscBool print,eerr,done,lrc[2],conforming,global; 176 PetscErrorCode ierr; 177 178 PetscFunctionBegin; 179 /* test variable order code and print debug info TODO: to be removed */ 180 print = PETSC_FALSE; 181 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 182 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 183 184 /* Return to caller if there are no edges in the decomposition */ 185 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 186 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 187 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 188 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 189 lrc[0] = PETSC_FALSE; 190 for (i=0;i<n;i++) { 191 if (PetscRealPart(vals[i]) > 2.) { 192 lrc[0] = PETSC_TRUE; 193 break; 194 } 195 } 196 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 197 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 198 if (!lrc[1]) PetscFunctionReturn(0); 199 200 /* If the discrete gradient is defined for a subset of dofs and global is true, 201 it assumes G is given in global ordering for all the dofs. 202 Otherwise, the ordering is global for the Nedelec field */ 203 order = pcbddc->nedorder; 204 conforming = pcbddc->conforming; 205 field = pcbddc->nedfield; 206 global = pcbddc->nedglobal; 207 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 208 if (pcbddc->n_ISForDofsLocal && field > -1) { 209 PetscBool setprimal = PETSC_FALSE; 210 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field_primal",&setprimal,NULL);CHKERRQ(ierr); 211 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 212 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 213 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 214 if (setprimal) { 215 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,nedfieldlocal);CHKERRQ(ierr); 216 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 217 PetscFunctionReturn(0); 218 } 219 } else if (!pcbddc->n_ISForDofsLocal) { 220 PetscBool testnedfield = PETSC_FALSE; 221 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 222 if (!testnedfield) { 223 ne = n; 224 nedfieldlocal = NULL; 225 } else { 226 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 227 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 228 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 230 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 231 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 232 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 233 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 234 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 235 for (i=0,cum=0;i<n;i++) { 236 if (matis->sf_leafdata[i] > 1) { 237 matis->sf_leafdata[cum++] = i; 238 } 239 } 240 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 241 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 242 } 243 global = PETSC_TRUE; 244 } else { 245 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 246 } 247 248 if (nedfieldlocal) { /* merge with previous code when testing is done */ 249 IS is; 250 251 /* need to map from the local Nedelec field to local numbering */ 252 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 253 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 254 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 255 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 256 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 257 if (global) { 258 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 259 el2g = al2g; 260 } else { 261 IS gis; 262 263 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 264 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 265 ierr = ISDestroy(&gis);CHKERRQ(ierr); 266 } 267 ierr = ISDestroy(&is);CHKERRQ(ierr); 268 } else { 269 /* restore default */ 270 pcbddc->nedfield = -1; 271 /* one ref for the destruction of al2g, one for el2g */ 272 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 273 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 274 el2g = al2g; 275 fl2g = NULL; 276 } 277 278 /* Sanity checks */ 279 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 280 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 281 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 282 283 /* Drop connections for interior edges */ 284 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 285 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 286 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 287 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 288 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 289 if (nedfieldlocal) { 290 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 291 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 292 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 293 } else { 294 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 295 } 296 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 297 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 298 if (global) { 299 PetscInt rst; 300 301 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 302 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 303 if (matis->sf_rootdata[i] < 2) { 304 matis->sf_rootdata[cum++] = i + rst; 305 } 306 } 307 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 308 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 309 } else { 310 PetscInt *tbz; 311 312 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 313 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 314 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 315 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 316 for (i=0,cum=0;i<ne;i++) 317 if (matis->sf_leafdata[idxs[i]] == 1) 318 tbz[cum++] = i; 319 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 320 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 321 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 322 ierr = PetscFree(tbz);CHKERRQ(ierr); 323 } 324 325 /* Extract subdomain relevant rows of G */ 326 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 327 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 328 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 329 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 330 ierr = ISDestroy(&lned);CHKERRQ(ierr); 331 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 332 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 333 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 334 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 335 if (print) { 336 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 337 ierr = MatView(lG,NULL);CHKERRQ(ierr); 338 } 339 340 /* SF for nodal communications */ 341 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 342 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 343 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 344 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 345 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 346 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 347 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 348 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 349 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 350 351 /* Destroy temporary G created in MATIS format and modified G */ 352 ierr = MatDestroy(&G);CHKERRQ(ierr); 353 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 354 355 /* Save lG */ 356 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 357 358 /* Analyze the edge-nodes connections (duplicate lG) */ 359 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 360 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 361 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 362 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 363 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 364 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 365 /* need to import the boundary specification to ensure the 366 proper detection of coarse edges' endpoints */ 367 if (pcbddc->DirichletBoundariesLocal) { 368 IS is; 369 370 if (fl2g) { 371 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 372 } else { 373 is = pcbddc->DirichletBoundariesLocal; 374 } 375 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 376 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 377 for (i=0;i<cum;i++) { 378 if (idxs[i] >= 0) { 379 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 380 } 381 } 382 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 383 if (fl2g) { 384 ierr = ISDestroy(&is);CHKERRQ(ierr); 385 } 386 } 387 if (pcbddc->NeumannBoundariesLocal) { 388 IS is; 389 390 if (fl2g) { 391 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 392 } else { 393 is = pcbddc->NeumannBoundariesLocal; 394 } 395 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 396 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 397 for (i=0;i<cum;i++) { 398 if (idxs[i] >= 0) { 399 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 400 } 401 } 402 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 403 if (fl2g) { 404 ierr = ISDestroy(&is);CHKERRQ(ierr); 405 } 406 } 407 408 /* need to remove coarse faces' dofs to ensure the 409 proper detection of coarse edges' endpoints */ 410 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 411 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 412 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 413 for (i=1;i<n_neigh;i++) 414 for (j=0;j<n_shared[i];j++) 415 marks[shared[i][j]]++; 416 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 417 for (i=0;i<ne;i++) { 418 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 419 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 420 } 421 } 422 423 if (!conforming) { 424 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 425 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 426 } 427 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 428 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 429 cum = 0; 430 for (i=0;i<ne;i++) { 431 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 432 if (!PetscBTLookup(btee,i)) { 433 marks[cum++] = i; 434 continue; 435 } 436 /* set badly connected edge dofs as primal */ 437 if (!conforming) { 438 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 439 marks[cum++] = i; 440 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 441 for (j=ii[i];j<ii[i+1];j++) { 442 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 443 } 444 } else { 445 /* every edge dofs should be connected trough a certain number of nodal dofs 446 to other edge dofs belonging to coarse edges 447 - at most 2 endpoints 448 - order-1 interior nodal dofs 449 - no undefined nodal dofs (nconn < order) 450 */ 451 PetscInt ends = 0,ints = 0, undef = 0; 452 for (j=ii[i];j<ii[i+1];j++) { 453 PetscInt v = jj[j],k; 454 PetscInt nconn = iit[v+1]-iit[v]; 455 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 456 if (nconn > order) ends++; 457 else if (nconn == order) ints++; 458 else undef++; 459 } 460 if (undef || ends > 2 || ints != order -1) { 461 marks[cum++] = i; 462 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 463 for (j=ii[i];j<ii[i+1];j++) { 464 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 465 } 466 } 467 } 468 } 469 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 470 if (!order && ii[i+1] != ii[i]) { 471 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 472 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 473 } 474 } 475 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 476 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 477 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 478 if (!conforming) { 479 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 480 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 481 } 482 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 483 /* identify splitpoints and corner candidates */ 484 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 485 if (print) { 486 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 487 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 488 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 489 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 490 } 491 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 492 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 493 for (i=0;i<nv;i++) { 494 PetscInt ord = order, test = ii[i+1]-ii[i]; 495 if (!order) { /* variable order */ 496 PetscReal vorder = 0.; 497 498 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 499 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 500 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 501 ord = 1; 502 } 503 #if defined(PETSC_USE_DEBUG) 504 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 505 #endif 506 if (test >= 3*ord) { /* splitpoints */ 507 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 508 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 509 } else if (test == ord) { 510 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 511 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 512 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 513 } else { 514 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 515 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 516 } 517 } 518 } 519 520 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 521 if (order != 1) { 522 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 523 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 524 for (i=0;i<nv;i++) { 525 if (PetscBTLookup(btvcand,i)) { 526 PetscBool found = PETSC_FALSE; 527 for (j=ii[i];j<ii[i+1] && !found;j++) { 528 PetscInt k,e = jj[j]; 529 if (PetscBTLookup(bte,e)) continue; 530 for (k=iit[e];k<iit[e+1];k++) { 531 PetscInt v = jjt[k]; 532 if (v != i && PetscBTLookup(btvcand,v)) { 533 found = PETSC_TRUE; 534 break; 535 } 536 } 537 } 538 if (!found) { 539 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 540 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 541 } else { 542 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 543 } 544 } 545 } 546 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 547 } 548 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 549 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 550 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 551 552 /* Get the local G^T explicitly */ 553 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 554 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 555 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 556 557 /* Mark interior nodal dofs */ 558 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 559 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 560 for (i=1;i<n_neigh;i++) { 561 for (j=0;j<n_shared[i];j++) { 562 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 563 } 564 } 565 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 566 567 /* communicate corners and splitpoints */ 568 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 569 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 570 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 571 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 572 573 if (print) { 574 IS tbz; 575 576 cum = 0; 577 for (i=0;i<nv;i++) 578 if (sfvleaves[i]) 579 vmarks[cum++] = i; 580 581 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 582 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 583 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 584 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 585 } 586 587 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 588 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 589 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 590 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 591 592 /* Zero rows of lGt corresponding to identified corners 593 and interior nodal dofs */ 594 cum = 0; 595 for (i=0;i<nv;i++) { 596 if (sfvleaves[i]) { 597 vmarks[cum++] = i; 598 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 599 } 600 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 601 } 602 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 603 if (print) { 604 IS tbz; 605 606 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 607 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 608 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 609 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 610 } 611 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 612 ierr = PetscFree(vmarks);CHKERRQ(ierr); 613 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 614 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 615 616 /* Recompute G */ 617 ierr = MatDestroy(&lG);CHKERRQ(ierr); 618 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 619 if (print) { 620 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 621 ierr = MatView(lG,NULL);CHKERRQ(ierr); 622 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 623 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 624 } 625 626 /* Get primal dofs (if any) */ 627 cum = 0; 628 for (i=0;i<ne;i++) { 629 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 630 } 631 if (fl2g) { 632 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 633 } 634 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 635 if (print) { 636 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 637 ierr = ISView(primals,NULL);CHKERRQ(ierr); 638 } 639 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 640 /* TODO: what if the user passed in some of them ? */ 641 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 642 ierr = ISDestroy(&primals);CHKERRQ(ierr); 643 644 /* Compute edge connectivity */ 645 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 646 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 647 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 648 if (fl2g) { 649 PetscBT btf; 650 PetscInt *iia,*jja,*iiu,*jju; 651 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 652 653 /* create CSR for all local dofs */ 654 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 655 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 656 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 657 iiu = pcbddc->mat_graph->xadj; 658 jju = pcbddc->mat_graph->adjncy; 659 } else if (pcbddc->use_local_adj) { 660 rest = PETSC_TRUE; 661 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 662 } else { 663 free = PETSC_TRUE; 664 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 665 iiu[0] = 0; 666 for (i=0;i<n;i++) { 667 iiu[i+1] = i+1; 668 jju[i] = -1; 669 } 670 } 671 672 /* import sizes of CSR */ 673 iia[0] = 0; 674 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 675 676 /* overwrite entries corresponding to the Nedelec field */ 677 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 678 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 679 for (i=0;i<ne;i++) { 680 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 681 iia[idxs[i]+1] = ii[i+1]-ii[i]; 682 } 683 684 /* iia in CSR */ 685 for (i=0;i<n;i++) iia[i+1] += iia[i]; 686 687 /* jja in CSR */ 688 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 689 for (i=0;i<n;i++) 690 if (!PetscBTLookup(btf,i)) 691 for (j=0;j<iiu[i+1]-iiu[i];j++) 692 jja[iia[i]+j] = jju[iiu[i]+j]; 693 694 /* map edge dofs connectivity */ 695 if (jj) { 696 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 697 for (i=0;i<ne;i++) { 698 PetscInt e = idxs[i]; 699 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 700 } 701 } 702 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 703 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 704 if (rest) { 705 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 706 } 707 if (free) { 708 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 709 } 710 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 711 } else { 712 if (jj) { 713 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 714 } 715 } 716 717 /* Analyze interface for edge dofs */ 718 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 719 720 /* Get coarse edges in the edge space */ 721 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 722 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 723 724 if (fl2g) { 725 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 726 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 727 for (i=0;i<nee;i++) { 728 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 729 } 730 } else { 731 eedges = alleedges; 732 primals = allprimals; 733 } 734 735 /* Mark fine edge dofs with their coarse edge id */ 736 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 738 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 739 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 740 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 741 if (print) { 742 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 743 ierr = ISView(primals,NULL);CHKERRQ(ierr); 744 } 745 746 maxsize = 0; 747 for (i=0;i<nee;i++) { 748 PetscInt size,mark = i+1; 749 750 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 751 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 752 for (j=0;j<size;j++) marks[idxs[j]] = mark; 753 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 754 maxsize = PetscMax(maxsize,size); 755 } 756 757 /* Find coarse edge endpoints */ 758 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 759 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 760 for (i=0;i<nee;i++) { 761 PetscInt mark = i+1,size; 762 763 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 764 if (!size && nedfieldlocal) continue; 765 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 766 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 767 if (print) { 768 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 769 ISView(eedges[i],NULL); 770 } 771 for (j=0;j<size;j++) { 772 PetscInt k, ee = idxs[j]; 773 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 774 for (k=ii[ee];k<ii[ee+1];k++) { 775 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 776 if (PetscBTLookup(btv,jj[k])) { 777 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 778 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 779 PetscInt k2; 780 PetscBool corner = PETSC_FALSE; 781 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 782 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 783 /* it's a corner if either is connected with an edge dof belonging to a different cc or 784 if the edge dof lie on the natural part of the boundary */ 785 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 786 corner = PETSC_TRUE; 787 break; 788 } 789 } 790 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 791 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 792 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 793 } else { 794 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 795 } 796 } 797 } 798 } 799 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 800 } 801 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 802 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 803 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 804 805 /* Reset marked primal dofs */ 806 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 807 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 808 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 809 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 810 811 /* Now use the initial lG */ 812 ierr = MatDestroy(&lG);CHKERRQ(ierr); 813 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 814 lG = lGinit; 815 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 816 817 /* Compute extended cols indices */ 818 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 819 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 820 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 821 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 822 i *= maxsize; 823 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 824 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 825 eerr = PETSC_FALSE; 826 for (i=0;i<nee;i++) { 827 PetscInt size,found = 0; 828 829 cum = 0; 830 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 831 if (!size && nedfieldlocal) continue; 832 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 833 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 834 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 835 for (j=0;j<size;j++) { 836 PetscInt k,ee = idxs[j]; 837 for (k=ii[ee];k<ii[ee+1];k++) { 838 PetscInt vv = jj[k]; 839 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 840 else if (!PetscBTLookupSet(btvc,vv)) found++; 841 } 842 } 843 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 844 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 845 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 846 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 847 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 848 /* it may happen that endpoints are not defined at this point 849 if it is the case, mark this edge for a second pass */ 850 if (cum != size -1 || found != 2) { 851 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 852 if (print) { 853 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 854 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 855 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 856 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 857 } 858 eerr = PETSC_TRUE; 859 } 860 } 861 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 862 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 863 if (done) { 864 PetscInt *newprimals; 865 866 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 867 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 868 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 869 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 870 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 871 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 872 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 873 for (i=0;i<nee;i++) { 874 PetscBool has_candidates = PETSC_FALSE; 875 if (PetscBTLookup(bter,i)) { 876 PetscInt size,mark = i+1; 877 878 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 879 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 880 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 881 for (j=0;j<size;j++) { 882 PetscInt k,ee = idxs[j]; 883 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 884 for (k=ii[ee];k<ii[ee+1];k++) { 885 /* set all candidates located on the edge as corners */ 886 if (PetscBTLookup(btvcand,jj[k])) { 887 PetscInt k2,vv = jj[k]; 888 has_candidates = PETSC_TRUE; 889 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 890 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 891 /* set all edge dofs connected to candidate as primals */ 892 for (k2=iit[vv];k2<iit[vv+1];k2++) { 893 if (marks[jjt[k2]] == mark) { 894 PetscInt k3,ee2 = jjt[k2]; 895 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 896 newprimals[cum++] = ee2; 897 /* finally set the new corners */ 898 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 899 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 900 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 901 } 902 } 903 } 904 } else { 905 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 906 } 907 } 908 } 909 if (!has_candidates) { /* circular edge */ 910 PetscInt k, ee = idxs[0],*tmarks; 911 912 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 913 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 914 for (k=ii[ee];k<ii[ee+1];k++) { 915 PetscInt k2; 916 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 917 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 918 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 919 } 920 for (j=0;j<size;j++) { 921 if (tmarks[idxs[j]] > 1) { 922 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 923 newprimals[cum++] = idxs[j]; 924 } 925 } 926 ierr = PetscFree(tmarks);CHKERRQ(ierr); 927 } 928 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 929 } 930 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 931 } 932 ierr = PetscFree(extcols);CHKERRQ(ierr); 933 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 934 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 935 if (fl2g) { 936 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 937 ierr = ISDestroy(&primals);CHKERRQ(ierr); 938 for (i=0;i<nee;i++) { 939 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 940 } 941 ierr = PetscFree(eedges);CHKERRQ(ierr); 942 } 943 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 944 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 945 ierr = PetscFree(newprimals);CHKERRQ(ierr); 946 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 947 ierr = ISDestroy(&primals);CHKERRQ(ierr); 948 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 949 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 950 if (fl2g) { 951 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 952 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 953 for (i=0;i<nee;i++) { 954 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 955 } 956 } else { 957 eedges = alleedges; 958 primals = allprimals; 959 } 960 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 961 962 /* Mark again */ 963 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 964 for (i=0;i<nee;i++) { 965 PetscInt size,mark = i+1; 966 967 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 968 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 969 for (j=0;j<size;j++) marks[idxs[j]] = mark; 970 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 971 } 972 if (print) { 973 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 974 ierr = ISView(primals,NULL);CHKERRQ(ierr); 975 } 976 977 /* Recompute extended cols */ 978 eerr = PETSC_FALSE; 979 for (i=0;i<nee;i++) { 980 PetscInt size; 981 982 cum = 0; 983 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 984 if (!size && nedfieldlocal) continue; 985 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 986 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 987 for (j=0;j<size;j++) { 988 PetscInt k,ee = idxs[j]; 989 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 990 } 991 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 992 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 993 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 994 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 995 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 996 if (cum != size -1) { 997 if (print) { 998 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 999 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1000 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1001 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1002 } 1003 eerr = PETSC_TRUE; 1004 } 1005 } 1006 } 1007 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1008 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1009 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1010 /* an error should not occur at this point */ 1011 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1012 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1013 1014 /* Check the number of endpoints */ 1015 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1016 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1017 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1018 for (i=0;i<nee;i++) { 1019 PetscInt size, found = 0, gc[2]; 1020 1021 /* init with defaults */ 1022 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1023 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1024 if (!size && nedfieldlocal) continue; 1025 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1026 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1027 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1028 for (j=0;j<size;j++) { 1029 PetscInt k,ee = idxs[j]; 1030 for (k=ii[ee];k<ii[ee+1];k++) { 1031 PetscInt vv = jj[k]; 1032 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1033 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1034 corners[i*2+found++] = vv; 1035 } 1036 } 1037 } 1038 if (found != 2) { 1039 PetscInt e; 1040 if (fl2g) { 1041 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1042 } else { 1043 e = idxs[0]; 1044 } 1045 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1046 } 1047 /* WARNING : this depends on how pcbddc->primal_indices_local_idxs is filled up in PCBDDConstraintsSetUp */ 1048 cedges[i] = idxs[0]; 1049 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1050 if (gc[0] > gc[1]) { 1051 PetscInt swap = corners[2*i]; 1052 corners[2*i] = corners[2*i+1]; 1053 corners[2*i+1] = swap; 1054 } 1055 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1056 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1057 } 1058 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1059 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1060 1061 #if defined(PETSC_USE_DEBUG) 1062 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1063 not interfere with neighbouring coarse edges */ 1064 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1065 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1066 for (i=0;i<nv;i++) { 1067 PetscInt emax = 0,eemax = 0; 1068 1069 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1070 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1071 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1072 for (j=1;j<nee+1;j++) { 1073 if (emax < emarks[j]) { 1074 emax = emarks[j]; 1075 eemax = j; 1076 } 1077 } 1078 /* not relevant for edges */ 1079 if (!eemax) continue; 1080 1081 for (j=ii[i];j<ii[i+1];j++) { 1082 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1083 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1084 } 1085 } 1086 } 1087 ierr = PetscFree(emarks);CHKERRQ(ierr); 1088 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1089 #endif 1090 1091 /* Compute extended rows indices for edge blocks of the change of basis */ 1092 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1093 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1094 extmem *= maxsize; 1095 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1096 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1097 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1098 for (i=0;i<nv;i++) { 1099 PetscInt mark = 0,size,start; 1100 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1101 for (j=ii[i];j<ii[i+1];j++) 1102 if (marks[jj[j]] && !mark) 1103 mark = marks[jj[j]]; 1104 1105 /* not relevant */ 1106 if (!mark) continue; 1107 1108 /* import extended row */ 1109 mark--; 1110 start = mark*extmem+extrowcum[mark]; 1111 size = ii[i+1]-ii[i]; 1112 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1113 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1114 extrowcum[mark] += size; 1115 } 1116 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1117 cum = 0; 1118 for (i=0;i<nee;i++) { 1119 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1120 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1121 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1122 cum = PetscMax(cum,size); 1123 } 1124 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1125 ierr = PetscFree(marks);CHKERRQ(ierr); 1126 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1127 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1128 1129 /* Workspace for lapack inner calls and VecSetValues */ 1130 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1131 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1132 for (i=0;i<maxsize;i++) vals[i] = 1.; 1133 1134 /* Create vectors for quadrature rules */ 1135 /* TODO preserve other quadratures */ 1136 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 1137 for (i=0;i<nquads;i++) { 1138 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 1139 ierr = VecSetLocalToGlobalMapping(quads[i],al2g);CHKERRQ(ierr); 1140 } 1141 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 1142 1143 /* Create change of basis matrix (preallocation can be improved) */ 1144 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1145 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1146 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1147 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1148 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1149 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1150 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1151 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1152 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1153 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1154 1155 /* Defaults to identity */ 1156 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1157 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1158 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1159 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1160 1161 /* Create discrete gradient for the coarser level if needed */ 1162 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1163 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1164 if (pcbddc->current_level < pcbddc->max_levels) { 1165 ISLocalToGlobalMapping cel2g,cvl2g; 1166 IS wis,gwis; 1167 PetscInt cnv,cne; 1168 1169 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1170 if (fl2g) { 1171 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1172 } else { 1173 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1174 pcbddc->nedclocal = wis; 1175 } 1176 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1177 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1178 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1179 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1180 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1181 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1182 1183 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1184 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1185 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1186 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1187 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1188 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1189 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1190 1191 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1192 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1193 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1194 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1195 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1196 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1197 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1198 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1199 } 1200 1201 #if defined(PRINT_GDET) 1202 inc = 0; 1203 lev = pcbddc->current_level; 1204 #endif 1205 for (i=0;i<nee;i++) { 1206 Mat Gins = NULL, GKins = NULL; 1207 IS cornersis = NULL; 1208 PetscScalar cvals[2]; 1209 1210 if (pcbddc->nedcG) { 1211 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1212 } 1213 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1214 if (Gins && GKins) { 1215 PetscScalar *data; 1216 const PetscInt *rows,*cols; 1217 PetscInt nrh,nch,nrc,ncc; 1218 1219 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1220 /* H1 */ 1221 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1222 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1223 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1224 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1225 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1226 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1227 /* complement */ 1228 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1229 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1230 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 1231 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 1232 if (ncc != 1 && pcbddc->nedcG) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the dicrete gradient for the next level with ncc %d",ncc); 1233 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1234 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1235 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1236 /* Gins kernel quadratures */ 1237 for (j=0;j<ncc;j++) { 1238 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 1239 } 1240 /* H1 average */ 1241 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1242 1243 /* coarse discrete gradient */ 1244 if (pcbddc->nedcG) { 1245 PetscInt cols[2]; 1246 1247 cols[0] = 2*i; 1248 cols[1] = 2*i+1; 1249 if (print) PetscPrintf(PETSC_COMM_SELF,"INSERT at local row %d, cols (%d,%d), cvals (%g,%g)\n",i,cols[0],cols[1],cvals[0],cvals[1]); 1250 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1251 } 1252 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1253 } 1254 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1255 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1256 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1257 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1258 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1259 } 1260 1261 /* Start assembling */ 1262 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1263 for (i=0;i<nquads;i++) { 1264 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 1265 } 1266 if (pcbddc->nedcG) { 1267 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1268 } 1269 1270 /* Free */ 1271 if (fl2g) { 1272 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1273 for (i=0;i<nee;i++) { 1274 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1275 } 1276 ierr = PetscFree(eedges);CHKERRQ(ierr); 1277 } 1278 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1279 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1280 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1281 ierr = PetscFree(extrow);CHKERRQ(ierr); 1282 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1283 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1284 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1285 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1286 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1287 ierr = PetscFree(vals);CHKERRQ(ierr); 1288 ierr = PetscFree(corners);CHKERRQ(ierr); 1289 ierr = PetscFree(cedges);CHKERRQ(ierr); 1290 ierr = PetscFree(extrows);CHKERRQ(ierr); 1291 ierr = PetscFree(extcols);CHKERRQ(ierr); 1292 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1293 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1294 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1295 1296 /* Complete assembling */ 1297 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1298 for (i=0;i<nquads;i++) { 1299 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 1300 } 1301 for (i=0;i<nquads;i++) { 1302 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(quads);CHKERRQ(ierr); 1305 if (pcbddc->nedcG) { 1306 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1307 #if 0 1308 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1309 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1310 #endif 1311 } 1312 1313 /* set change of basis */ 1314 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1315 #if 0 1316 if (pcbddc->current_level) { 1317 PetscViewer viewer; 1318 char filename[256]; 1319 Mat Tned; 1320 IS sub; 1321 PetscInt rst; 1322 1323 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1324 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 1325 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 1326 if (nedfieldlocal) { 1327 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1328 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 1329 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1330 } else { 1331 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 1332 } 1333 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1334 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1335 ierr = MatGetOwnershipRange(pc->pmat,&rst,NULL);CHKERRQ(ierr); 1336 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 1337 if (matis->sf_rootdata[i]) { 1338 matis->sf_rootdata[cum++] = i + rst; 1339 } 1340 } 1341 PetscPrintf(PETSC_COMM_SELF,"[%D] LEVEL %d MY ne %d cum %d\n",PetscGlobalRank,pcbddc->current_level,ne,cum); 1342 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cum,matis->sf_rootdata,PETSC_USE_POINTER,&sub);CHKERRQ(ierr); 1343 ierr = MatGetSubMatrix(T,sub,sub,MAT_INITIAL_MATRIX,&Tned);CHKERRQ(ierr); 1344 ierr = ISDestroy(&sub);CHKERRQ(ierr); 1345 1346 sprintf(filename,"Change_l%d.m",pcbddc->current_level); 1347 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)Tned),filename,&viewer);CHKERRQ(ierr); 1348 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1349 ierr = PetscObjectSetName((PetscObject)Tned,"T");CHKERRQ(ierr); 1350 ierr = MatView(Tned,viewer);CHKERRQ(ierr); 1351 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1352 ierr = MatDestroy(&Tned);CHKERRQ(ierr); 1353 } 1354 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1355 #endif 1356 ierr = MatDestroy(&T);CHKERRQ(ierr); 1357 1358 /* set quadratures */ 1359 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 1360 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 1361 1362 PetscFunctionReturn(0); 1363 } 1364 1365 /* the near-null space of BDDC carries information on quadrature weights, 1366 and these can be collinear -> so cheat with MatNullSpaceCreate 1367 and create a suitable set of basis vectors first */ 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1370 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1371 { 1372 PetscErrorCode ierr; 1373 PetscInt i; 1374 1375 PetscFunctionBegin; 1376 for (i=0;i<nvecs;i++) { 1377 PetscInt first,last; 1378 1379 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1380 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1381 if (i>=first && i < last) { 1382 PetscScalar *data; 1383 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1384 if (!has_const) { 1385 data[i-first] = 1.; 1386 } else { 1387 data[2*i-first] = 1./PetscSqrtReal(2.); 1388 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1389 } 1390 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1391 } 1392 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1393 } 1394 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1395 for (i=0;i<nvecs;i++) { /* reset vectors */ 1396 PetscInt first,last; 1397 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1398 if (i>=first && i < last) { 1399 PetscScalar *data; 1400 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1401 if (!has_const) { 1402 data[i-first] = 0.; 1403 } else { 1404 data[2*i-first] = 0.; 1405 data[2*i-first+1] = 0.; 1406 } 1407 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1408 } 1409 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1416 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1417 { 1418 Mat loc_divudotp; 1419 Vec p,v,vins,quad_vec,*quad_vecs; 1420 ISLocalToGlobalMapping map; 1421 IS *faces,*edges; 1422 PetscScalar *vals; 1423 const PetscScalar *array; 1424 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1425 PetscMPIInt rank; 1426 PetscErrorCode ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1430 if (graph->twodim) { 1431 lmaxneighs = 2; 1432 } else { 1433 lmaxneighs = 1; 1434 for (i=0;i<ne;i++) { 1435 const PetscInt *idxs; 1436 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1437 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1438 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1439 } 1440 lmaxneighs++; /* graph count does not include self */ 1441 } 1442 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1443 maxsize = 0; 1444 for (i=0;i<ne;i++) { 1445 PetscInt nn; 1446 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1447 maxsize = PetscMax(maxsize,nn); 1448 } 1449 for (i=0;i<nf;i++) { 1450 PetscInt nn; 1451 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1452 maxsize = PetscMax(maxsize,nn); 1453 } 1454 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1455 /* create vectors to hold quadrature weights */ 1456 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1457 if (!transpose) { 1458 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1459 } else { 1460 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1461 } 1462 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1463 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1464 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1465 for (i=0;i<maxneighs;i++) { 1466 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1467 } 1468 1469 /* compute local quad vec */ 1470 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1471 if (!transpose) { 1472 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1473 } else { 1474 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1475 } 1476 ierr = VecSet(p,1.);CHKERRQ(ierr); 1477 if (!transpose) { 1478 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1479 } else { 1480 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1481 } 1482 if (vl2l) { 1483 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1484 } else { 1485 vins = v; 1486 } 1487 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1488 ierr = VecDestroy(&p);CHKERRQ(ierr); 1489 1490 /* insert in global quadrature vecs */ 1491 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1492 for (i=0;i<nf;i++) { 1493 const PetscInt *idxs; 1494 PetscInt idx,nn,j; 1495 1496 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1497 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1498 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1499 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1500 idx = -(idx+1); 1501 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1502 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1503 } 1504 for (i=0;i<ne;i++) { 1505 const PetscInt *idxs; 1506 PetscInt idx,nn,j; 1507 1508 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1509 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1510 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1511 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1512 idx = -(idx+1); 1513 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1514 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1515 } 1516 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1517 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1518 if (vl2l) { 1519 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1520 } 1521 ierr = VecDestroy(&v);CHKERRQ(ierr); 1522 ierr = PetscFree(vals);CHKERRQ(ierr); 1523 1524 /* assemble near null space */ 1525 for (i=0;i<maxneighs;i++) { 1526 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1527 } 1528 for (i=0;i<maxneighs;i++) { 1529 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1530 } 1531 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1538 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1539 { 1540 PetscErrorCode ierr; 1541 Vec local,global; 1542 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1543 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1544 1545 PetscFunctionBegin; 1546 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1547 /* need to convert from global to local topology information and remove references to information in global ordering */ 1548 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1549 if (pcbddc->user_provided_isfordofs) { 1550 if (pcbddc->n_ISForDofs) { 1551 PetscInt i; 1552 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1553 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1554 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1555 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1556 } 1557 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1558 pcbddc->n_ISForDofs = 0; 1559 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1560 } 1561 } else { 1562 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1563 PetscInt i, n = matis->A->rmap->n; 1564 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1565 if (i > 1) { 1566 pcbddc->n_ISForDofsLocal = i; 1567 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1568 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1569 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1570 } 1571 } 1572 } 1573 } 1574 1575 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1576 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1577 } 1578 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1579 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1580 } 1581 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1582 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1583 } 1584 ierr = VecDestroy(&global);CHKERRQ(ierr); 1585 ierr = VecDestroy(&local);CHKERRQ(ierr); 1586 PetscFunctionReturn(0); 1587 } 1588 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1591 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1592 { 1593 PC_IS *pcis = (PC_IS*)(pc->data); 1594 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 if (!pcbddc->benign_have_null) { 1599 PetscFunctionReturn(0); 1600 } 1601 if (pcbddc->ChangeOfBasisMatrix) { 1602 Vec swap; 1603 1604 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1605 swap = pcbddc->work_change; 1606 pcbddc->work_change = r; 1607 r = swap; 1608 } 1609 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1612 ierr = VecSet(z,0.);CHKERRQ(ierr); 1613 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1614 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1615 if (pcbddc->ChangeOfBasisMatrix) { 1616 pcbddc->work_change = r; 1617 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1618 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1619 } 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1625 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1626 { 1627 PCBDDCBenignMatMult_ctx ctx; 1628 PetscErrorCode ierr; 1629 PetscBool apply_right,apply_left,reset_x; 1630 1631 PetscFunctionBegin; 1632 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1633 if (transpose) { 1634 apply_right = ctx->apply_left; 1635 apply_left = ctx->apply_right; 1636 } else { 1637 apply_right = ctx->apply_right; 1638 apply_left = ctx->apply_left; 1639 } 1640 reset_x = PETSC_FALSE; 1641 if (apply_right) { 1642 const PetscScalar *ax; 1643 PetscInt nl,i; 1644 1645 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1646 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1647 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1648 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1649 for (i=0;i<ctx->benign_n;i++) { 1650 PetscScalar sum,val; 1651 const PetscInt *idxs; 1652 PetscInt nz,j; 1653 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1654 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1655 sum = 0.; 1656 if (ctx->apply_p0) { 1657 val = ctx->work[idxs[nz-1]]; 1658 for (j=0;j<nz-1;j++) { 1659 sum += ctx->work[idxs[j]]; 1660 ctx->work[idxs[j]] += val; 1661 } 1662 } else { 1663 for (j=0;j<nz-1;j++) { 1664 sum += ctx->work[idxs[j]]; 1665 } 1666 } 1667 ctx->work[idxs[nz-1]] -= sum; 1668 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1669 } 1670 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1671 reset_x = PETSC_TRUE; 1672 } 1673 if (transpose) { 1674 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1675 } else { 1676 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1677 } 1678 if (reset_x) { 1679 ierr = VecResetArray(x);CHKERRQ(ierr); 1680 } 1681 if (apply_left) { 1682 PetscScalar *ay; 1683 PetscInt i; 1684 1685 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1686 for (i=0;i<ctx->benign_n;i++) { 1687 PetscScalar sum,val; 1688 const PetscInt *idxs; 1689 PetscInt nz,j; 1690 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1691 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1692 val = -ay[idxs[nz-1]]; 1693 if (ctx->apply_p0) { 1694 sum = 0.; 1695 for (j=0;j<nz-1;j++) { 1696 sum += ay[idxs[j]]; 1697 ay[idxs[j]] += val; 1698 } 1699 ay[idxs[nz-1]] += sum; 1700 } else { 1701 for (j=0;j<nz-1;j++) { 1702 ay[idxs[j]] += val; 1703 } 1704 ay[idxs[nz-1]] = 0.; 1705 } 1706 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1707 } 1708 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1709 } 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1715 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1716 { 1717 PetscErrorCode ierr; 1718 1719 PetscFunctionBegin; 1720 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1726 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1727 { 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "PCBDDCBenignShellMat" 1737 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1738 { 1739 PC_IS *pcis = (PC_IS*)pc->data; 1740 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1741 PCBDDCBenignMatMult_ctx ctx; 1742 PetscErrorCode ierr; 1743 1744 PetscFunctionBegin; 1745 if (!restore) { 1746 Mat A_IB,A_BI; 1747 PetscScalar *work; 1748 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1749 1750 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1751 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1752 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1753 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1754 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1755 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1756 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1757 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1758 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1759 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1760 ctx->apply_left = PETSC_TRUE; 1761 ctx->apply_right = PETSC_FALSE; 1762 ctx->apply_p0 = PETSC_FALSE; 1763 ctx->benign_n = pcbddc->benign_n; 1764 if (reuse) { 1765 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1766 ctx->free = PETSC_FALSE; 1767 } else { /* TODO: could be optimized for successive solves */ 1768 ISLocalToGlobalMapping N_to_D; 1769 PetscInt i; 1770 1771 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1772 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1773 for (i=0;i<pcbddc->benign_n;i++) { 1774 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1775 } 1776 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1777 ctx->free = PETSC_TRUE; 1778 } 1779 ctx->A = pcis->A_IB; 1780 ctx->work = work; 1781 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1782 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1783 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1784 pcis->A_IB = A_IB; 1785 1786 /* A_BI as A_IB^T */ 1787 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1788 pcbddc->benign_original_mat = pcis->A_BI; 1789 pcis->A_BI = A_BI; 1790 } else { 1791 if (!pcbddc->benign_original_mat) { 1792 PetscFunctionReturn(0); 1793 } 1794 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1795 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1796 pcis->A_IB = ctx->A; 1797 ctx->A = NULL; 1798 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1799 pcis->A_BI = pcbddc->benign_original_mat; 1800 pcbddc->benign_original_mat = NULL; 1801 if (ctx->free) { 1802 PetscInt i; 1803 for (i=0;i<ctx->benign_n;i++) { 1804 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1805 } 1806 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1807 } 1808 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1809 ierr = PetscFree(ctx);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 1814 /* used just in bddc debug mode */ 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "PCBDDCBenignProject" 1817 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1818 { 1819 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1820 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1821 Mat An; 1822 PetscErrorCode ierr; 1823 1824 PetscFunctionBegin; 1825 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1826 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1827 if (is1) { 1828 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1829 ierr = MatDestroy(&An);CHKERRQ(ierr); 1830 } else { 1831 *B = An; 1832 } 1833 PetscFunctionReturn(0); 1834 } 1835 1836 /* TODO: add reuse flag */ 1837 #undef __FUNCT__ 1838 #define __FUNCT__ "MatSeqAIJCompress" 1839 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1840 { 1841 Mat Bt; 1842 PetscScalar *a,*bdata; 1843 const PetscInt *ii,*ij; 1844 PetscInt m,n,i,nnz,*bii,*bij; 1845 PetscBool flg_row; 1846 PetscErrorCode ierr; 1847 1848 PetscFunctionBegin; 1849 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1850 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1851 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1852 nnz = n; 1853 for (i=0;i<ii[n];i++) { 1854 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1855 } 1856 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1857 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1858 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1859 nnz = 0; 1860 bii[0] = 0; 1861 for (i=0;i<n;i++) { 1862 PetscInt j; 1863 for (j=ii[i];j<ii[i+1];j++) { 1864 PetscScalar entry = a[j]; 1865 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1866 bij[nnz] = ij[j]; 1867 bdata[nnz] = entry; 1868 nnz++; 1869 } 1870 } 1871 bii[i+1] = nnz; 1872 } 1873 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1874 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1875 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1876 { 1877 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1878 b->free_a = PETSC_TRUE; 1879 b->free_ij = PETSC_TRUE; 1880 } 1881 *B = Bt; 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatDetectDisconnectedComponents" 1887 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1888 { 1889 Mat B; 1890 IS is_dummy,*cc_n; 1891 ISLocalToGlobalMapping l2gmap_dummy; 1892 PCBDDCGraph graph; 1893 PetscInt i,n; 1894 PetscInt *xadj,*adjncy; 1895 PetscInt *xadj_filtered,*adjncy_filtered; 1896 PetscBool flg_row,isseqaij; 1897 PetscErrorCode ierr; 1898 1899 PetscFunctionBegin; 1900 if (!A->rmap->N || !A->cmap->N) { 1901 *ncc = 0; 1902 *cc = NULL; 1903 PetscFunctionReturn(0); 1904 } 1905 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1906 if (!isseqaij && filter) { 1907 PetscBool isseqdense; 1908 1909 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1910 if (!isseqdense) { 1911 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1912 } else { /* TODO: rectangular case and LDA */ 1913 PetscScalar *array; 1914 PetscReal chop=1.e-6; 1915 1916 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1917 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1918 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1919 for (i=0;i<n;i++) { 1920 PetscInt j; 1921 for (j=i+1;j<n;j++) { 1922 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1923 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1924 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1925 } 1926 } 1927 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1928 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1929 } 1930 } else { 1931 B = A; 1932 } 1933 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1934 1935 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1936 if (filter) { 1937 PetscScalar *data; 1938 PetscInt j,cum; 1939 1940 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1941 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1942 cum = 0; 1943 for (i=0;i<n;i++) { 1944 PetscInt t; 1945 1946 for (j=xadj[i];j<xadj[i+1];j++) { 1947 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1948 continue; 1949 } 1950 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1951 } 1952 t = xadj_filtered[i]; 1953 xadj_filtered[i] = cum; 1954 cum += t; 1955 } 1956 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1957 } else { 1958 xadj_filtered = NULL; 1959 adjncy_filtered = NULL; 1960 } 1961 1962 /* compute local connected components using PCBDDCGraph */ 1963 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1964 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1965 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1966 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1967 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1968 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1969 if (xadj_filtered) { 1970 graph->xadj = xadj_filtered; 1971 graph->adjncy = adjncy_filtered; 1972 } else { 1973 graph->xadj = xadj; 1974 graph->adjncy = adjncy; 1975 } 1976 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1977 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1978 /* partial clean up */ 1979 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1980 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1981 if (A != B) { 1982 ierr = MatDestroy(&B);CHKERRQ(ierr); 1983 } 1984 1985 /* get back data */ 1986 if (ncc) *ncc = graph->ncc; 1987 if (cc) { 1988 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1989 for (i=0;i<graph->ncc;i++) { 1990 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1991 } 1992 *cc = cc_n; 1993 } 1994 /* clean up graph */ 1995 graph->xadj = 0; 1996 graph->adjncy = 0; 1997 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "PCBDDCBenignCheck" 2003 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2004 { 2005 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2006 PC_IS* pcis = (PC_IS*)(pc->data); 2007 IS dirIS = NULL; 2008 PetscInt i; 2009 PetscErrorCode ierr; 2010 2011 PetscFunctionBegin; 2012 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2013 if (zerodiag) { 2014 Mat A; 2015 Vec vec3_N; 2016 PetscScalar *vals; 2017 const PetscInt *idxs; 2018 PetscInt nz,*count; 2019 2020 /* p0 */ 2021 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2022 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2023 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2024 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2025 for (i=0;i<nz;i++) vals[i] = 1.; 2026 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2027 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2028 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2029 /* v_I */ 2030 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2031 for (i=0;i<nz;i++) vals[i] = 0.; 2032 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2033 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2034 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2035 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2036 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2037 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2038 if (dirIS) { 2039 PetscInt n; 2040 2041 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2042 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2043 for (i=0;i<n;i++) vals[i] = 0.; 2044 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2045 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2046 } 2047 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2048 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2049 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2050 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2051 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2052 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2053 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2054 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2055 ierr = PetscFree(vals);CHKERRQ(ierr); 2056 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2057 2058 /* there should not be any pressure dofs lying on the interface */ 2059 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2060 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2061 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2062 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2063 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2064 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2065 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2066 ierr = PetscFree(count);CHKERRQ(ierr); 2067 } 2068 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2069 2070 /* check PCBDDCBenignGetOrSetP0 */ 2071 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2072 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2073 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2074 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2075 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2076 for (i=0;i<pcbddc->benign_n;i++) { 2077 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2078 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2079 } 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 2085 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2086 { 2087 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2088 IS pressures,zerodiag,*zerodiag_subs; 2089 PetscInt nz,n; 2090 PetscInt *interior_dofs,n_interior_dofs; 2091 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 2092 PetscErrorCode ierr; 2093 2094 PetscFunctionBegin; 2095 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2096 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2097 for (n=0;n<pcbddc->benign_n;n++) { 2098 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2099 } 2100 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2101 pcbddc->benign_n = 0; 2102 /* if a local info on dofs is present, assumes that the last field represents "pressures" 2103 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2104 Checks if all the pressure dofs in each subdomain have a zero diagonal 2105 If not, a change of basis on pressures is not needed 2106 since the local Schur complements are already SPD 2107 */ 2108 has_null_pressures = PETSC_TRUE; 2109 have_null = PETSC_TRUE; 2110 if (pcbddc->n_ISForDofsLocal) { 2111 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2112 2113 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2114 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2115 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2116 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2117 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2118 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2119 if (!sorted) { 2120 ierr = ISSort(pressures);CHKERRQ(ierr); 2121 } 2122 } else { 2123 pressures = NULL; 2124 } 2125 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2126 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2127 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2128 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2129 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2130 if (!sorted) { 2131 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2132 } 2133 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2134 if (!nz) { 2135 if (n) have_null = PETSC_FALSE; 2136 has_null_pressures = PETSC_FALSE; 2137 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2138 } 2139 recompute_zerodiag = PETSC_FALSE; 2140 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2141 zerodiag_subs = NULL; 2142 pcbddc->benign_n = 0; 2143 n_interior_dofs = 0; 2144 interior_dofs = NULL; 2145 if (pcbddc->current_level) { /* need to compute interior nodes */ 2146 PetscInt n,i,j; 2147 PetscInt n_neigh,*neigh,*n_shared,**shared; 2148 PetscInt *iwork; 2149 2150 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2151 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2152 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2153 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2154 for (i=1;i<n_neigh;i++) 2155 for (j=0;j<n_shared[i];j++) 2156 iwork[shared[i][j]] += 1; 2157 for (i=0;i<n;i++) 2158 if (!iwork[i]) 2159 interior_dofs[n_interior_dofs++] = i; 2160 ierr = PetscFree(iwork);CHKERRQ(ierr); 2161 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2162 } 2163 if (has_null_pressures) { 2164 IS *subs; 2165 PetscInt nsubs,i,j,nl; 2166 const PetscInt *idxs; 2167 PetscScalar *array; 2168 Vec *work; 2169 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2170 2171 subs = pcbddc->local_subs; 2172 nsubs = pcbddc->n_local_subs; 2173 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2174 if (pcbddc->current_level) { 2175 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2176 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2177 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2178 /* work[0] = 1_p */ 2179 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2180 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2181 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2182 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2183 /* work[0] = 1_v */ 2184 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2185 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2186 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2187 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2188 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2189 } 2190 if (nsubs > 1) { 2191 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2192 for (i=0;i<nsubs;i++) { 2193 ISLocalToGlobalMapping l2g; 2194 IS t_zerodiag_subs; 2195 PetscInt nl; 2196 2197 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2198 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2199 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2200 if (nl) { 2201 PetscBool valid = PETSC_TRUE; 2202 2203 if (pcbddc->current_level) { 2204 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2205 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2206 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2207 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2208 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2209 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2210 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2211 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2212 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2213 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2214 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2215 for (j=0;j<n_interior_dofs;j++) { 2216 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2217 valid = PETSC_FALSE; 2218 break; 2219 } 2220 } 2221 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2222 } 2223 if (valid && pcbddc->NeumannBoundariesLocal) { 2224 IS t_bc; 2225 PetscInt nzb; 2226 2227 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 2228 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 2229 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 2230 if (nzb) valid = PETSC_FALSE; 2231 } 2232 if (valid && pressures) { 2233 IS t_pressure_subs; 2234 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2235 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2236 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2237 } 2238 if (valid) { 2239 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2240 pcbddc->benign_n++; 2241 } else { 2242 recompute_zerodiag = PETSC_TRUE; 2243 } 2244 } 2245 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2246 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2247 } 2248 } else { /* there's just one subdomain (or zero if they have not been detected */ 2249 PetscBool valid = PETSC_TRUE; 2250 2251 if (pcbddc->NeumannBoundariesLocal) { 2252 PetscInt nzb; 2253 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 2254 if (nzb) valid = PETSC_FALSE; 2255 } 2256 if (valid && pressures) { 2257 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2258 } 2259 if (valid && pcbddc->current_level) { 2260 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2261 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2262 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2263 for (j=0;j<n_interior_dofs;j++) { 2264 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2265 valid = PETSC_FALSE; 2266 break; 2267 } 2268 } 2269 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2270 } 2271 if (valid) { 2272 pcbddc->benign_n = 1; 2273 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2274 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2275 zerodiag_subs[0] = zerodiag; 2276 } 2277 } 2278 if (pcbddc->current_level) { 2279 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2280 } 2281 } 2282 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2283 2284 if (!pcbddc->benign_n) { 2285 PetscInt n; 2286 2287 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2288 recompute_zerodiag = PETSC_FALSE; 2289 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2290 if (n) { 2291 has_null_pressures = PETSC_FALSE; 2292 have_null = PETSC_FALSE; 2293 } 2294 } 2295 2296 /* final check for null pressures */ 2297 if (zerodiag && pressures) { 2298 PetscInt nz,np; 2299 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2300 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2301 if (nz != np) have_null = PETSC_FALSE; 2302 } 2303 2304 if (recompute_zerodiag) { 2305 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2306 if (pcbddc->benign_n == 1) { 2307 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2308 zerodiag = zerodiag_subs[0]; 2309 } else { 2310 PetscInt i,nzn,*new_idxs; 2311 2312 nzn = 0; 2313 for (i=0;i<pcbddc->benign_n;i++) { 2314 PetscInt ns; 2315 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2316 nzn += ns; 2317 } 2318 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2319 nzn = 0; 2320 for (i=0;i<pcbddc->benign_n;i++) { 2321 PetscInt ns,*idxs; 2322 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2323 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2324 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2325 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2326 nzn += ns; 2327 } 2328 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2329 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2330 } 2331 have_null = PETSC_FALSE; 2332 } 2333 2334 /* Prepare matrix to compute no-net-flux */ 2335 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2336 Mat A,loc_divudotp; 2337 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2338 IS row,col,isused = NULL; 2339 PetscInt M,N,n,st,n_isused; 2340 2341 if (pressures) { 2342 isused = pressures; 2343 } else { 2344 isused = zerodiag; 2345 } 2346 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2347 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2348 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2349 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2350 n_isused = 0; 2351 if (isused) { 2352 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2353 } 2354 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2355 st = st-n_isused; 2356 if (n) { 2357 const PetscInt *gidxs; 2358 2359 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2360 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2361 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2362 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2363 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2364 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2365 } else { 2366 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2367 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2368 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2369 } 2370 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2371 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2372 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2373 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2374 ierr = ISDestroy(&row);CHKERRQ(ierr); 2375 ierr = ISDestroy(&col);CHKERRQ(ierr); 2376 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2377 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2378 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2379 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2380 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2381 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2382 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2383 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2384 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2385 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2386 } 2387 2388 /* change of basis and p0 dofs */ 2389 if (has_null_pressures) { 2390 IS zerodiagc; 2391 const PetscInt *idxs,*idxsc; 2392 PetscInt i,s,*nnz; 2393 2394 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2395 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2396 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2397 /* local change of basis for pressures */ 2398 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2399 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2400 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2401 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2402 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2403 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2404 for (i=0;i<pcbddc->benign_n;i++) { 2405 PetscInt nzs,j; 2406 2407 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2408 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2409 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2410 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2411 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2412 } 2413 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2414 ierr = PetscFree(nnz);CHKERRQ(ierr); 2415 /* set identity on velocities */ 2416 for (i=0;i<n-nz;i++) { 2417 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2418 } 2419 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2420 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2421 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2422 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2423 /* set change on pressures */ 2424 for (s=0;s<pcbddc->benign_n;s++) { 2425 PetscScalar *array; 2426 PetscInt nzs; 2427 2428 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2429 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2430 for (i=0;i<nzs-1;i++) { 2431 PetscScalar vals[2]; 2432 PetscInt cols[2]; 2433 2434 cols[0] = idxs[i]; 2435 cols[1] = idxs[nzs-1]; 2436 vals[0] = 1.; 2437 vals[1] = 1.; 2438 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2439 } 2440 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2441 for (i=0;i<nzs-1;i++) array[i] = -1.; 2442 array[nzs-1] = 1.; 2443 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2444 /* store local idxs for p0 */ 2445 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2446 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2447 ierr = PetscFree(array);CHKERRQ(ierr); 2448 } 2449 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2450 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2451 /* project if needed */ 2452 if (pcbddc->benign_change_explicit) { 2453 Mat M; 2454 2455 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2456 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2457 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2458 ierr = MatDestroy(&M);CHKERRQ(ierr); 2459 } 2460 /* store global idxs for p0 */ 2461 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2462 } 2463 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2464 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2465 2466 /* determines if the coarse solver will be singular or not */ 2467 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2468 /* determines if the problem has subdomains with 0 pressure block */ 2469 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2470 *zerodiaglocal = zerodiag; 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2476 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2477 { 2478 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2479 PetscScalar *array; 2480 PetscErrorCode ierr; 2481 2482 PetscFunctionBegin; 2483 if (!pcbddc->benign_sf) { 2484 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2485 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2486 } 2487 if (get) { 2488 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2489 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2490 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2491 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2492 } else { 2493 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2494 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2495 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2496 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2497 } 2498 PetscFunctionReturn(0); 2499 } 2500 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2503 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2504 { 2505 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2506 PetscErrorCode ierr; 2507 2508 PetscFunctionBegin; 2509 /* TODO: add error checking 2510 - avoid nested pop (or push) calls. 2511 - cannot push before pop. 2512 - cannot call this if pcbddc->local_mat is NULL 2513 */ 2514 if (!pcbddc->benign_n) { 2515 PetscFunctionReturn(0); 2516 } 2517 if (pop) { 2518 if (pcbddc->benign_change_explicit) { 2519 IS is_p0; 2520 MatReuse reuse; 2521 2522 /* extract B_0 */ 2523 reuse = MAT_INITIAL_MATRIX; 2524 if (pcbddc->benign_B0) { 2525 reuse = MAT_REUSE_MATRIX; 2526 } 2527 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2528 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2529 /* remove rows and cols from local problem */ 2530 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2531 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2532 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2533 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2534 } else { 2535 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2536 PetscScalar *vals; 2537 PetscInt i,n,*idxs_ins; 2538 2539 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2540 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2541 if (!pcbddc->benign_B0) { 2542 PetscInt *nnz; 2543 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2544 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2545 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2546 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2547 for (i=0;i<pcbddc->benign_n;i++) { 2548 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2549 nnz[i] = n - nnz[i]; 2550 } 2551 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2552 ierr = PetscFree(nnz);CHKERRQ(ierr); 2553 } 2554 2555 for (i=0;i<pcbddc->benign_n;i++) { 2556 PetscScalar *array; 2557 PetscInt *idxs,j,nz,cum; 2558 2559 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2560 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2561 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2562 for (j=0;j<nz;j++) vals[j] = 1.; 2563 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2564 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2565 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2566 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2567 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2568 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2569 cum = 0; 2570 for (j=0;j<n;j++) { 2571 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2572 vals[cum] = array[j]; 2573 idxs_ins[cum] = j; 2574 cum++; 2575 } 2576 } 2577 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2578 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2579 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2580 } 2581 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2582 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2583 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2584 } 2585 } else { /* push */ 2586 if (pcbddc->benign_change_explicit) { 2587 PetscInt i; 2588 2589 for (i=0;i<pcbddc->benign_n;i++) { 2590 PetscScalar *B0_vals; 2591 PetscInt *B0_cols,B0_ncol; 2592 2593 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2594 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2595 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2596 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2597 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2598 } 2599 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2600 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2601 } else { 2602 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2603 } 2604 } 2605 PetscFunctionReturn(0); 2606 } 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2610 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2611 { 2612 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2613 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2614 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2615 PetscBLASInt *B_iwork,*B_ifail; 2616 PetscScalar *work,lwork; 2617 PetscScalar *St,*S,*eigv; 2618 PetscScalar *Sarray,*Starray; 2619 PetscReal *eigs,thresh; 2620 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2621 PetscBool allocated_S_St; 2622 #if defined(PETSC_USE_COMPLEX) 2623 PetscReal *rwork; 2624 #endif 2625 PetscErrorCode ierr; 2626 2627 PetscFunctionBegin; 2628 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2629 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2630 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2631 2632 if (pcbddc->dbg_flag) { 2633 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2634 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2635 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2636 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2637 } 2638 2639 if (pcbddc->dbg_flag) { 2640 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2641 } 2642 2643 /* max size of subsets */ 2644 mss = 0; 2645 for (i=0;i<sub_schurs->n_subs;i++) { 2646 PetscInt subset_size; 2647 2648 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2649 mss = PetscMax(mss,subset_size); 2650 } 2651 2652 /* min/max and threshold */ 2653 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2654 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2655 nmax = PetscMax(nmin,nmax); 2656 allocated_S_St = PETSC_FALSE; 2657 if (nmin) { 2658 allocated_S_St = PETSC_TRUE; 2659 } 2660 2661 /* allocate lapack workspace */ 2662 cum = cum2 = 0; 2663 maxneigs = 0; 2664 for (i=0;i<sub_schurs->n_subs;i++) { 2665 PetscInt n,subset_size; 2666 2667 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2668 n = PetscMin(subset_size,nmax); 2669 cum += subset_size; 2670 cum2 += subset_size*n; 2671 maxneigs = PetscMax(maxneigs,n); 2672 } 2673 if (mss) { 2674 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2675 PetscBLASInt B_itype = 1; 2676 PetscBLASInt B_N = mss; 2677 PetscReal zero = 0.0; 2678 PetscReal eps = 0.0; /* dlamch? */ 2679 2680 B_lwork = -1; 2681 S = NULL; 2682 St = NULL; 2683 eigs = NULL; 2684 eigv = NULL; 2685 B_iwork = NULL; 2686 B_ifail = NULL; 2687 #if defined(PETSC_USE_COMPLEX) 2688 rwork = NULL; 2689 #endif 2690 thresh = 1.0; 2691 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2692 #if defined(PETSC_USE_COMPLEX) 2693 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2694 #else 2695 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2696 #endif 2697 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2698 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2699 } else { 2700 /* TODO */ 2701 } 2702 } else { 2703 lwork = 0; 2704 } 2705 2706 nv = 0; 2707 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2708 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2709 } 2710 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2711 if (allocated_S_St) { 2712 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2713 } 2714 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2715 #if defined(PETSC_USE_COMPLEX) 2716 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2717 #endif 2718 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2719 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2720 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2721 nv+cum,&pcbddc->adaptive_constraints_idxs, 2722 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2723 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2724 2725 maxneigs = 0; 2726 cum = cumarray = 0; 2727 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2728 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2729 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2730 const PetscInt *idxs; 2731 2732 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2733 for (cum=0;cum<nv;cum++) { 2734 pcbddc->adaptive_constraints_n[cum] = 1; 2735 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2736 pcbddc->adaptive_constraints_data[cum] = 1.0; 2737 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2738 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2739 } 2740 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2741 } 2742 2743 if (mss) { /* multilevel */ 2744 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2745 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2746 } 2747 2748 thresh = pcbddc->adaptive_threshold; 2749 for (i=0;i<sub_schurs->n_subs;i++) { 2750 const PetscInt *idxs; 2751 PetscReal upper,lower; 2752 PetscInt j,subset_size,eigs_start = 0; 2753 PetscBLASInt B_N; 2754 PetscBool same_data = PETSC_FALSE; 2755 2756 if (pcbddc->use_deluxe_scaling) { 2757 upper = PETSC_MAX_REAL; 2758 lower = thresh; 2759 } else { 2760 upper = 1./thresh; 2761 lower = 0.; 2762 } 2763 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2764 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2765 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2766 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2767 if (sub_schurs->is_hermitian) { 2768 PetscInt j,k; 2769 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2770 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2771 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2772 } 2773 for (j=0;j<subset_size;j++) { 2774 for (k=j;k<subset_size;k++) { 2775 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2776 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2777 } 2778 } 2779 } else { 2780 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2781 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2782 } 2783 } else { 2784 S = Sarray + cumarray; 2785 St = Starray + cumarray; 2786 } 2787 /* see if we can save some work */ 2788 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2789 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2790 } 2791 2792 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2793 B_neigs = 0; 2794 } else { 2795 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2796 PetscBLASInt B_itype = 1; 2797 PetscBLASInt B_IL, B_IU; 2798 PetscReal eps = -1.0; /* dlamch? */ 2799 PetscInt nmin_s; 2800 PetscBool compute_range = PETSC_FALSE; 2801 2802 if (pcbddc->dbg_flag) { 2803 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2804 } 2805 2806 compute_range = PETSC_FALSE; 2807 if (thresh > 1.+PETSC_SMALL && !same_data) { 2808 compute_range = PETSC_TRUE; 2809 } 2810 2811 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2812 if (compute_range) { 2813 2814 /* ask for eigenvalues larger than thresh */ 2815 #if defined(PETSC_USE_COMPLEX) 2816 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2817 #else 2818 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2819 #endif 2820 } else if (!same_data) { 2821 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2822 B_IL = 1; 2823 #if defined(PETSC_USE_COMPLEX) 2824 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2825 #else 2826 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2827 #endif 2828 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2829 PetscInt k; 2830 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2831 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2832 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2833 nmin = nmax; 2834 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2835 for (k=0;k<nmax;k++) { 2836 eigs[k] = 1./PETSC_SMALL; 2837 eigv[k*(subset_size+1)] = 1.0; 2838 } 2839 } 2840 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2841 if (B_ierr) { 2842 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2843 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2844 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2845 } 2846 2847 if (B_neigs > nmax) { 2848 if (pcbddc->dbg_flag) { 2849 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2850 } 2851 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2852 B_neigs = nmax; 2853 } 2854 2855 nmin_s = PetscMin(nmin,B_N); 2856 if (B_neigs < nmin_s) { 2857 PetscBLASInt B_neigs2; 2858 2859 if (pcbddc->use_deluxe_scaling) { 2860 B_IL = B_N - nmin_s + 1; 2861 B_IU = B_N - B_neigs; 2862 } else { 2863 B_IL = B_neigs + 1; 2864 B_IU = nmin_s; 2865 } 2866 if (pcbddc->dbg_flag) { 2867 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2868 } 2869 if (sub_schurs->is_hermitian) { 2870 PetscInt j,k; 2871 for (j=0;j<subset_size;j++) { 2872 for (k=j;k<subset_size;k++) { 2873 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2874 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2875 } 2876 } 2877 } else { 2878 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2879 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2880 } 2881 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2882 #if defined(PETSC_USE_COMPLEX) 2883 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2884 #else 2885 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2886 #endif 2887 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2888 B_neigs += B_neigs2; 2889 } 2890 if (B_ierr) { 2891 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2892 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2893 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2894 } 2895 if (pcbddc->dbg_flag) { 2896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2897 for (j=0;j<B_neigs;j++) { 2898 if (eigs[j] == 0.0) { 2899 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2900 } else { 2901 if (pcbddc->use_deluxe_scaling) { 2902 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2903 } else { 2904 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2905 } 2906 } 2907 } 2908 } 2909 } else { 2910 /* TODO */ 2911 } 2912 } 2913 /* change the basis back to the original one */ 2914 if (sub_schurs->change) { 2915 Mat change,phi,phit; 2916 2917 if (pcbddc->dbg_flag > 1) { 2918 PetscInt ii; 2919 for (ii=0;ii<B_neigs;ii++) { 2920 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2921 for (j=0;j<B_N;j++) { 2922 #if defined(PETSC_USE_COMPLEX) 2923 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2924 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2925 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2926 #else 2927 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2928 #endif 2929 } 2930 } 2931 } 2932 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2933 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2934 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2935 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2936 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2937 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2938 } 2939 maxneigs = PetscMax(B_neigs,maxneigs); 2940 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2941 if (B_neigs) { 2942 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2943 2944 if (pcbddc->dbg_flag > 1) { 2945 PetscInt ii; 2946 for (ii=0;ii<B_neigs;ii++) { 2947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2948 for (j=0;j<B_N;j++) { 2949 #if defined(PETSC_USE_COMPLEX) 2950 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2951 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2952 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2953 #else 2954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2955 #endif 2956 } 2957 } 2958 } 2959 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2960 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2961 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2962 cum++; 2963 } 2964 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2965 /* shift for next computation */ 2966 cumarray += subset_size*subset_size; 2967 } 2968 if (pcbddc->dbg_flag) { 2969 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2970 } 2971 2972 if (mss) { 2973 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2974 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2975 /* destroy matrices (junk) */ 2976 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2977 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2978 } 2979 if (allocated_S_St) { 2980 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2981 } 2982 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2983 #if defined(PETSC_USE_COMPLEX) 2984 ierr = PetscFree(rwork);CHKERRQ(ierr); 2985 #endif 2986 if (pcbddc->dbg_flag) { 2987 PetscInt maxneigs_r; 2988 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2989 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2990 } 2991 PetscFunctionReturn(0); 2992 } 2993 2994 #undef __FUNCT__ 2995 #define __FUNCT__ "PCBDDCSetUpSolvers" 2996 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2997 { 2998 PetscScalar *coarse_submat_vals; 2999 PetscErrorCode ierr; 3000 3001 PetscFunctionBegin; 3002 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3003 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3004 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3005 3006 /* Setup local neumann solver ksp_R */ 3007 /* PCBDDCSetUpLocalScatters should be called first! */ 3008 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3009 3010 /* 3011 Setup local correction and local part of coarse basis. 3012 Gives back the dense local part of the coarse matrix in column major ordering 3013 */ 3014 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3015 3016 /* Compute total number of coarse nodes and setup coarse solver */ 3017 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3018 3019 /* free */ 3020 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3021 PetscFunctionReturn(0); 3022 } 3023 3024 #undef __FUNCT__ 3025 #define __FUNCT__ "PCBDDCResetCustomization" 3026 PetscErrorCode PCBDDCResetCustomization(PC pc) 3027 { 3028 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3029 PetscErrorCode ierr; 3030 3031 PetscFunctionBegin; 3032 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3033 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3034 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3035 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3036 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3037 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3038 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3039 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3040 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3041 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3042 PetscFunctionReturn(0); 3043 } 3044 3045 #undef __FUNCT__ 3046 #define __FUNCT__ "PCBDDCResetTopography" 3047 PetscErrorCode PCBDDCResetTopography(PC pc) 3048 { 3049 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3050 PetscInt i; 3051 PetscErrorCode ierr; 3052 3053 PetscFunctionBegin; 3054 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3055 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3056 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3057 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3058 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3059 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3060 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3061 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3062 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3063 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3064 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3065 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3066 for (i=0;i<pcbddc->n_local_subs;i++) { 3067 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3068 } 3069 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3070 if (pcbddc->sub_schurs) { 3071 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 3072 } 3073 pcbddc->graphanalyzed = PETSC_FALSE; 3074 pcbddc->recompute_topography = PETSC_TRUE; 3075 PetscFunctionReturn(0); 3076 } 3077 3078 #undef __FUNCT__ 3079 #define __FUNCT__ "PCBDDCResetSolvers" 3080 PetscErrorCode PCBDDCResetSolvers(PC pc) 3081 { 3082 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3083 PetscErrorCode ierr; 3084 3085 PetscFunctionBegin; 3086 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3087 if (pcbddc->coarse_phi_B) { 3088 PetscScalar *array; 3089 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3090 ierr = PetscFree(array);CHKERRQ(ierr); 3091 } 3092 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3093 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3094 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3095 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3096 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3097 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3098 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3099 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3100 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3101 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3106 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 3107 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 3108 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3109 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3110 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3111 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3112 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3113 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3114 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3115 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3116 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3117 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3118 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3119 if (pcbddc->benign_zerodiag_subs) { 3120 PetscInt i; 3121 for (i=0;i<pcbddc->benign_n;i++) { 3122 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3123 } 3124 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3125 } 3126 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3127 PetscFunctionReturn(0); 3128 } 3129 3130 #undef __FUNCT__ 3131 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 3132 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3133 { 3134 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3135 PC_IS *pcis = (PC_IS*)pc->data; 3136 VecType impVecType; 3137 PetscInt n_constraints,n_R,old_size; 3138 PetscErrorCode ierr; 3139 3140 PetscFunctionBegin; 3141 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 3142 /* get sizes */ 3143 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3144 n_R = pcis->n - pcbddc->n_vertices; 3145 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3146 /* local work vectors (try to avoid unneeded work)*/ 3147 /* R nodes */ 3148 old_size = -1; 3149 if (pcbddc->vec1_R) { 3150 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3151 } 3152 if (n_R != old_size) { 3153 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3154 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3155 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3156 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3157 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3158 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3159 } 3160 /* local primal dofs */ 3161 old_size = -1; 3162 if (pcbddc->vec1_P) { 3163 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3164 } 3165 if (pcbddc->local_primal_size != old_size) { 3166 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3167 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3168 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3169 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3170 } 3171 /* local explicit constraints */ 3172 old_size = -1; 3173 if (pcbddc->vec1_C) { 3174 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3175 } 3176 if (n_constraints && n_constraints != old_size) { 3177 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3178 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3179 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3180 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3181 } 3182 PetscFunctionReturn(0); 3183 } 3184 3185 #undef __FUNCT__ 3186 #define __FUNCT__ "PCBDDCSetUpCorrection" 3187 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3188 { 3189 PetscErrorCode ierr; 3190 /* pointers to pcis and pcbddc */ 3191 PC_IS* pcis = (PC_IS*)pc->data; 3192 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3193 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3194 /* submatrices of local problem */ 3195 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3196 /* submatrices of local coarse problem */ 3197 Mat S_VV,S_CV,S_VC,S_CC; 3198 /* working matrices */ 3199 Mat C_CR; 3200 /* additional working stuff */ 3201 PC pc_R; 3202 Mat F; 3203 Vec dummy_vec; 3204 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3205 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3206 PetscScalar *work; 3207 PetscInt *idx_V_B; 3208 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3209 PetscInt i,n_R,n_D,n_B; 3210 3211 /* some shortcuts to scalars */ 3212 PetscScalar one=1.0,m_one=-1.0; 3213 3214 PetscFunctionBegin; 3215 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3216 3217 /* Set Non-overlapping dimensions */ 3218 n_vertices = pcbddc->n_vertices; 3219 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3220 n_B = pcis->n_B; 3221 n_D = pcis->n - n_B; 3222 n_R = pcis->n - n_vertices; 3223 3224 /* vertices in boundary numbering */ 3225 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3226 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3227 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3228 3229 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3230 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3231 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3232 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3233 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3234 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3235 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3236 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3237 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3238 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3239 3240 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3241 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3242 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3243 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3244 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3245 lda_rhs = n_R; 3246 need_benign_correction = PETSC_FALSE; 3247 if (isLU || isILU || isCHOL) { 3248 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3249 } else if (sub_schurs && sub_schurs->reuse_solver) { 3250 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3251 MatFactorType type; 3252 3253 F = reuse_solver->F; 3254 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3255 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3256 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3257 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3258 } else { 3259 F = NULL; 3260 } 3261 3262 /* allocate workspace */ 3263 n = 0; 3264 if (n_constraints) { 3265 n += lda_rhs*n_constraints; 3266 } 3267 if (n_vertices) { 3268 n = PetscMax(2*lda_rhs*n_vertices,n); 3269 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3270 } 3271 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3272 3273 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3274 dummy_vec = NULL; 3275 if (need_benign_correction && lda_rhs != n_R && F) { 3276 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3277 } 3278 3279 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3280 if (n_constraints) { 3281 Mat M1,M2,M3,C_B; 3282 IS is_aux; 3283 PetscScalar *array,*array2; 3284 3285 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3286 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3287 3288 /* Extract constraints on R nodes: C_{CR} */ 3289 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3290 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3291 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3292 3293 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3294 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3295 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3296 for (i=0;i<n_constraints;i++) { 3297 const PetscScalar *row_cmat_values; 3298 const PetscInt *row_cmat_indices; 3299 PetscInt size_of_constraint,j; 3300 3301 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3302 for (j=0;j<size_of_constraint;j++) { 3303 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3304 } 3305 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3306 } 3307 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3308 if (F) { 3309 Mat B; 3310 3311 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3312 if (need_benign_correction) { 3313 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3314 3315 /* rhs is already zero on interior dofs, no need to change the rhs */ 3316 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3317 } 3318 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3319 if (need_benign_correction) { 3320 PetscScalar *marr; 3321 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3322 3323 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3324 if (lda_rhs != n_R) { 3325 for (i=0;i<n_constraints;i++) { 3326 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3327 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3328 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3329 } 3330 } else { 3331 for (i=0;i<n_constraints;i++) { 3332 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3333 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3334 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3335 } 3336 } 3337 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3338 } 3339 ierr = MatDestroy(&B);CHKERRQ(ierr); 3340 } else { 3341 PetscScalar *marr; 3342 3343 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3344 for (i=0;i<n_constraints;i++) { 3345 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3346 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3347 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3348 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3349 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3350 } 3351 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3352 } 3353 if (!pcbddc->switch_static) { 3354 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3355 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3356 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3357 for (i=0;i<n_constraints;i++) { 3358 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3359 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3360 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3361 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3362 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3363 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3364 } 3365 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3366 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3367 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3368 } else { 3369 if (lda_rhs != n_R) { 3370 IS dummy; 3371 3372 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3373 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3374 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3375 } else { 3376 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3377 pcbddc->local_auxmat2 = local_auxmat2_R; 3378 } 3379 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3380 } 3381 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3382 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3383 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3384 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3385 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3386 if (isCHOL) { 3387 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3388 } else { 3389 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3390 } 3391 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3392 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3393 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3394 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3395 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3396 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3397 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3398 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3399 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3400 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3401 } 3402 3403 /* Get submatrices from subdomain matrix */ 3404 if (n_vertices) { 3405 IS is_aux; 3406 3407 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3408 IS tis; 3409 3410 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3411 ierr = ISSort(tis);CHKERRQ(ierr); 3412 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3413 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3414 } else { 3415 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3416 } 3417 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3418 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3419 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3420 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3421 } 3422 3423 /* Matrix of coarse basis functions (local) */ 3424 if (pcbddc->coarse_phi_B) { 3425 PetscInt on_B,on_primal,on_D=n_D; 3426 if (pcbddc->coarse_phi_D) { 3427 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3428 } 3429 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3430 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3431 PetscScalar *marray; 3432 3433 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3434 ierr = PetscFree(marray);CHKERRQ(ierr); 3435 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3436 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3437 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3438 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3439 } 3440 } 3441 3442 if (!pcbddc->coarse_phi_B) { 3443 PetscScalar *marray; 3444 3445 n = n_B*pcbddc->local_primal_size; 3446 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3447 n += n_D*pcbddc->local_primal_size; 3448 } 3449 if (!pcbddc->symmetric_primal) { 3450 n *= 2; 3451 } 3452 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3453 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3454 n = n_B*pcbddc->local_primal_size; 3455 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3456 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3457 n += n_D*pcbddc->local_primal_size; 3458 } 3459 if (!pcbddc->symmetric_primal) { 3460 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3461 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3462 n = n_B*pcbddc->local_primal_size; 3463 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3464 } 3465 } else { 3466 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3467 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3468 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3469 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3470 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3471 } 3472 } 3473 } 3474 3475 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3476 p0_lidx_I = NULL; 3477 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3478 const PetscInt *idxs; 3479 3480 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3481 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3482 for (i=0;i<pcbddc->benign_n;i++) { 3483 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3484 } 3485 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3486 } 3487 3488 /* vertices */ 3489 if (n_vertices) { 3490 3491 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3492 3493 if (n_R) { 3494 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3495 PetscBLASInt B_N,B_one = 1; 3496 PetscScalar *x,*y; 3497 PetscBool isseqaij; 3498 3499 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3500 if (need_benign_correction) { 3501 ISLocalToGlobalMapping RtoN; 3502 IS is_p0; 3503 PetscInt *idxs_p0,n; 3504 3505 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3506 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3507 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3508 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3509 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3510 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3511 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3512 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3513 } 3514 3515 if (lda_rhs == n_R) { 3516 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3517 } else { 3518 PetscScalar *av,*array; 3519 const PetscInt *xadj,*adjncy; 3520 PetscInt n; 3521 PetscBool flg_row; 3522 3523 array = work+lda_rhs*n_vertices; 3524 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3525 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3526 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3527 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3528 for (i=0;i<n;i++) { 3529 PetscInt j; 3530 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3531 } 3532 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3533 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3534 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3535 } 3536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3537 if (need_benign_correction) { 3538 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3539 PetscScalar *marr; 3540 3541 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3542 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3543 3544 | 0 0 0 | (V) 3545 L = | 0 0 -1 | (P-p0) 3546 | 0 0 -1 | (p0) 3547 3548 */ 3549 for (i=0;i<reuse_solver->benign_n;i++) { 3550 const PetscScalar *vals; 3551 const PetscInt *idxs,*idxs_zero; 3552 PetscInt n,j,nz; 3553 3554 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3555 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3556 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3557 for (j=0;j<n;j++) { 3558 PetscScalar val = vals[j]; 3559 PetscInt k,col = idxs[j]; 3560 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3561 } 3562 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3563 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3564 } 3565 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3566 } 3567 if (F) { 3568 /* need to correct the rhs */ 3569 if (need_benign_correction) { 3570 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3571 PetscScalar *marr; 3572 3573 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3574 if (lda_rhs != n_R) { 3575 for (i=0;i<n_vertices;i++) { 3576 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3577 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3578 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3579 } 3580 } else { 3581 for (i=0;i<n_vertices;i++) { 3582 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3583 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3584 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3585 } 3586 } 3587 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3588 } 3589 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3590 /* need to correct the solution */ 3591 if (need_benign_correction) { 3592 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3593 PetscScalar *marr; 3594 3595 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3596 if (lda_rhs != n_R) { 3597 for (i=0;i<n_vertices;i++) { 3598 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3599 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3600 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3601 } 3602 } else { 3603 for (i=0;i<n_vertices;i++) { 3604 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3605 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3606 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3607 } 3608 } 3609 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3610 } 3611 } else { 3612 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3613 for (i=0;i<n_vertices;i++) { 3614 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3615 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3616 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3617 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3618 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3619 } 3620 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3621 } 3622 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3623 /* S_VV and S_CV */ 3624 if (n_constraints) { 3625 Mat B; 3626 3627 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3628 for (i=0;i<n_vertices;i++) { 3629 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3630 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3631 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3632 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3633 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3634 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3635 } 3636 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3637 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3638 ierr = MatDestroy(&B);CHKERRQ(ierr); 3639 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3640 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3641 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3642 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3643 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3644 ierr = MatDestroy(&B);CHKERRQ(ierr); 3645 } 3646 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3647 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3648 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3649 } 3650 if (lda_rhs != n_R) { 3651 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3652 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3653 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3654 } 3655 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3656 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3657 if (need_benign_correction) { 3658 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3659 PetscScalar *marr,*sums; 3660 3661 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3662 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3663 for (i=0;i<reuse_solver->benign_n;i++) { 3664 const PetscScalar *vals; 3665 const PetscInt *idxs,*idxs_zero; 3666 PetscInt n,j,nz; 3667 3668 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3669 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3670 for (j=0;j<n_vertices;j++) { 3671 PetscInt k; 3672 sums[j] = 0.; 3673 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3674 } 3675 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3676 for (j=0;j<n;j++) { 3677 PetscScalar val = vals[j]; 3678 PetscInt k; 3679 for (k=0;k<n_vertices;k++) { 3680 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3681 } 3682 } 3683 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3684 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3685 } 3686 ierr = PetscFree(sums);CHKERRQ(ierr); 3687 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3688 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3689 } 3690 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3691 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3692 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3693 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3694 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3695 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3696 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3697 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3698 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3699 } else { 3700 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3701 } 3702 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3703 3704 /* coarse basis functions */ 3705 for (i=0;i<n_vertices;i++) { 3706 PetscScalar *y; 3707 3708 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3709 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3710 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3711 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3712 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3713 y[n_B*i+idx_V_B[i]] = 1.0; 3714 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3715 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3716 3717 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3718 PetscInt j; 3719 3720 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3721 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3722 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3723 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3724 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3725 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3726 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3727 } 3728 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3729 } 3730 /* if n_R == 0 the object is not destroyed */ 3731 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3732 } 3733 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3734 3735 if (n_constraints) { 3736 Mat B; 3737 3738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3739 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3740 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3741 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3742 if (n_vertices) { 3743 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3744 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3745 } else { 3746 Mat S_VCt; 3747 3748 if (lda_rhs != n_R) { 3749 ierr = MatDestroy(&B);CHKERRQ(ierr); 3750 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3751 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3752 } 3753 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3754 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3755 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3756 } 3757 } 3758 ierr = MatDestroy(&B);CHKERRQ(ierr); 3759 /* coarse basis functions */ 3760 for (i=0;i<n_constraints;i++) { 3761 PetscScalar *y; 3762 3763 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3764 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3765 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3766 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3767 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3768 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3769 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3770 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3771 PetscInt j; 3772 3773 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3774 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3775 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3776 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3777 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3778 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3779 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3780 } 3781 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3782 } 3783 } 3784 if (n_constraints) { 3785 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3786 } 3787 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3788 3789 /* coarse matrix entries relative to B_0 */ 3790 if (pcbddc->benign_n) { 3791 Mat B0_B,B0_BPHI; 3792 IS is_dummy; 3793 PetscScalar *data; 3794 PetscInt j; 3795 3796 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3797 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3798 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3799 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3800 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3801 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3802 for (j=0;j<pcbddc->benign_n;j++) { 3803 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3804 for (i=0;i<pcbddc->local_primal_size;i++) { 3805 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3806 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3807 } 3808 } 3809 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3810 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3811 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3812 } 3813 3814 /* compute other basis functions for non-symmetric problems */ 3815 if (!pcbddc->symmetric_primal) { 3816 Mat B_V=NULL,B_C=NULL; 3817 PetscScalar *marray; 3818 3819 if (n_constraints) { 3820 Mat S_CCT,C_CRT; 3821 3822 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3823 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3824 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3825 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3826 if (n_vertices) { 3827 Mat S_VCT; 3828 3829 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3830 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3831 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3832 } 3833 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3834 } else { 3835 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3836 } 3837 if (n_vertices && n_R) { 3838 PetscScalar *av,*marray; 3839 const PetscInt *xadj,*adjncy; 3840 PetscInt n; 3841 PetscBool flg_row; 3842 3843 /* B_V = B_V - A_VR^T */ 3844 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3845 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3846 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3847 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3848 for (i=0;i<n;i++) { 3849 PetscInt j; 3850 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3851 } 3852 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3853 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3854 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3855 } 3856 3857 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3858 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3859 for (i=0;i<n_vertices;i++) { 3860 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3861 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3862 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3863 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3864 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3865 } 3866 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3867 if (B_C) { 3868 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3869 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3870 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3871 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3872 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3873 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3874 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3875 } 3876 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3877 } 3878 /* coarse basis functions */ 3879 for (i=0;i<pcbddc->local_primal_size;i++) { 3880 PetscScalar *y; 3881 3882 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3883 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3884 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3885 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3886 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3887 if (i<n_vertices) { 3888 y[n_B*i+idx_V_B[i]] = 1.0; 3889 } 3890 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3891 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3892 3893 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3894 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3895 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3896 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3897 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3898 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3899 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3900 } 3901 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3902 } 3903 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3904 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3905 } 3906 /* free memory */ 3907 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3908 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3909 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3910 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3911 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3912 ierr = PetscFree(work);CHKERRQ(ierr); 3913 if (n_vertices) { 3914 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3915 } 3916 if (n_constraints) { 3917 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3918 } 3919 /* Checking coarse_sub_mat and coarse basis functios */ 3920 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3921 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3922 if (pcbddc->dbg_flag) { 3923 Mat coarse_sub_mat; 3924 Mat AUXMAT,TM1,TM2,TM3,TM4; 3925 Mat coarse_phi_D,coarse_phi_B; 3926 Mat coarse_psi_D,coarse_psi_B; 3927 Mat A_II,A_BB,A_IB,A_BI; 3928 Mat C_B,CPHI; 3929 IS is_dummy; 3930 Vec mones; 3931 MatType checkmattype=MATSEQAIJ; 3932 PetscReal real_value; 3933 3934 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3935 Mat A; 3936 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3937 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3938 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3939 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3940 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3941 ierr = MatDestroy(&A);CHKERRQ(ierr); 3942 } else { 3943 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3944 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3945 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3946 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3947 } 3948 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3949 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3950 if (!pcbddc->symmetric_primal) { 3951 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3952 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3953 } 3954 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3955 3956 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3957 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3958 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3959 if (!pcbddc->symmetric_primal) { 3960 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3961 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3962 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3963 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3964 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3965 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3966 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3967 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3968 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3969 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3970 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3971 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3972 } else { 3973 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3974 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3975 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3976 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3977 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3978 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3979 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3980 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3981 } 3982 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3983 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3984 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3985 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3986 if (pcbddc->benign_n) { 3987 Mat B0_B,B0_BPHI; 3988 PetscScalar *data,*data2; 3989 PetscInt j; 3990 3991 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3992 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3993 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3994 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3995 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3996 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3997 for (j=0;j<pcbddc->benign_n;j++) { 3998 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3999 for (i=0;i<pcbddc->local_primal_size;i++) { 4000 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4001 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4002 } 4003 } 4004 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4005 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4006 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4007 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4008 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4009 } 4010 #if 0 4011 { 4012 PetscViewer viewer; 4013 char filename[256]; 4014 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4015 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4016 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4017 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4018 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4019 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4020 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4021 if (save_change) { 4022 Mat phi_B; 4023 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4024 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4025 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4026 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4027 } else { 4028 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4029 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4030 } 4031 if (pcbddc->coarse_phi_D) { 4032 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4033 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4034 } 4035 if (pcbddc->coarse_psi_B) { 4036 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4037 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4038 } 4039 if (pcbddc->coarse_psi_D) { 4040 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4041 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4042 } 4043 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4044 } 4045 #endif 4046 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4047 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4048 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4049 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4050 4051 /* check constraints */ 4052 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4053 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4054 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4055 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4056 } else { 4057 PetscScalar *data; 4058 Mat tmat; 4059 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4060 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4061 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4062 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4063 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4064 } 4065 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4066 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4067 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4068 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4069 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4070 if (!pcbddc->symmetric_primal) { 4071 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4072 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4073 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4074 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4075 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4076 } 4077 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4078 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4079 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4080 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4081 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4082 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4083 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4084 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4085 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4086 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4087 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4088 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4089 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4090 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4091 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4092 if (!pcbddc->symmetric_primal) { 4093 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4094 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4095 } 4096 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4097 } 4098 /* get back data */ 4099 *coarse_submat_vals_n = coarse_submat_vals; 4100 PetscFunctionReturn(0); 4101 } 4102 4103 #undef __FUNCT__ 4104 #define __FUNCT__ "MatGetSubMatrixUnsorted" 4105 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4106 { 4107 Mat *work_mat; 4108 IS isrow_s,iscol_s; 4109 PetscBool rsorted,csorted; 4110 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4111 PetscErrorCode ierr; 4112 4113 PetscFunctionBegin; 4114 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4115 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4116 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4117 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4118 4119 if (!rsorted) { 4120 const PetscInt *idxs; 4121 PetscInt *idxs_sorted,i; 4122 4123 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4124 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4125 for (i=0;i<rsize;i++) { 4126 idxs_perm_r[i] = i; 4127 } 4128 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4129 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4130 for (i=0;i<rsize;i++) { 4131 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4132 } 4133 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4134 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4135 } else { 4136 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4137 isrow_s = isrow; 4138 } 4139 4140 if (!csorted) { 4141 if (isrow == iscol) { 4142 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4143 iscol_s = isrow_s; 4144 } else { 4145 const PetscInt *idxs; 4146 PetscInt *idxs_sorted,i; 4147 4148 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4149 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4150 for (i=0;i<csize;i++) { 4151 idxs_perm_c[i] = i; 4152 } 4153 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4154 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4155 for (i=0;i<csize;i++) { 4156 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4157 } 4158 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4159 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4160 } 4161 } else { 4162 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4163 iscol_s = iscol; 4164 } 4165 4166 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4167 4168 if (!rsorted || !csorted) { 4169 Mat new_mat; 4170 IS is_perm_r,is_perm_c; 4171 4172 if (!rsorted) { 4173 PetscInt *idxs_r,i; 4174 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4175 for (i=0;i<rsize;i++) { 4176 idxs_r[idxs_perm_r[i]] = i; 4177 } 4178 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4179 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4180 } else { 4181 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4182 } 4183 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4184 4185 if (!csorted) { 4186 if (isrow_s == iscol_s) { 4187 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4188 is_perm_c = is_perm_r; 4189 } else { 4190 PetscInt *idxs_c,i; 4191 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4192 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4193 for (i=0;i<csize;i++) { 4194 idxs_c[idxs_perm_c[i]] = i; 4195 } 4196 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4197 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4198 } 4199 } else { 4200 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4201 } 4202 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4203 4204 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4205 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4206 work_mat[0] = new_mat; 4207 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4208 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4209 } 4210 4211 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4212 *B = work_mat[0]; 4213 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4214 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4215 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4216 PetscFunctionReturn(0); 4217 } 4218 4219 #undef __FUNCT__ 4220 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4221 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4222 { 4223 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4224 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4225 Mat new_mat; 4226 IS is_local,is_global; 4227 PetscInt local_size; 4228 PetscBool isseqaij; 4229 PetscErrorCode ierr; 4230 4231 PetscFunctionBegin; 4232 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4233 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4234 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4235 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4236 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4237 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4238 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4239 4240 /* check */ 4241 if (pcbddc->dbg_flag) { 4242 Vec x,x_change; 4243 PetscReal error; 4244 4245 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4246 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4247 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4248 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4249 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4250 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4251 if (!pcbddc->change_interior) { 4252 const PetscScalar *x,*y,*v; 4253 PetscReal lerror = 0.; 4254 PetscInt i; 4255 4256 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4257 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4258 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4259 for (i=0;i<local_size;i++) 4260 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4261 lerror = PetscAbsScalar(x[i]-y[i]); 4262 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4263 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4264 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4265 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on I: %1.6e\n",error);CHKERRQ(ierr); 4267 } 4268 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4269 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4270 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4271 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4272 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4273 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 4274 ierr = VecDestroy(&x);CHKERRQ(ierr); 4275 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4276 } 4277 4278 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4279 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4280 if (isseqaij) { 4281 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4282 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4283 } else { 4284 Mat work_mat; 4285 4286 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4287 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4288 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4289 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4290 } 4291 if (matis->A->symmetric_set) { 4292 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4293 #if !defined(PETSC_USE_COMPLEX) 4294 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4295 #endif 4296 } 4297 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4298 PetscFunctionReturn(0); 4299 } 4300 4301 #undef __FUNCT__ 4302 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4303 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4304 { 4305 PC_IS* pcis = (PC_IS*)(pc->data); 4306 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4307 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4308 PetscInt *idx_R_local=NULL; 4309 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4310 PetscInt vbs,bs; 4311 PetscBT bitmask=NULL; 4312 PetscErrorCode ierr; 4313 4314 PetscFunctionBegin; 4315 /* 4316 No need to setup local scatters if 4317 - primal space is unchanged 4318 AND 4319 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4320 AND 4321 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4322 */ 4323 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4324 PetscFunctionReturn(0); 4325 } 4326 /* destroy old objects */ 4327 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4328 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4329 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4330 /* Set Non-overlapping dimensions */ 4331 n_B = pcis->n_B; 4332 n_D = pcis->n - n_B; 4333 n_vertices = pcbddc->n_vertices; 4334 4335 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4336 4337 /* create auxiliary bitmask and allocate workspace */ 4338 if (!sub_schurs || !sub_schurs->reuse_solver) { 4339 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4340 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4341 for (i=0;i<n_vertices;i++) { 4342 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4343 } 4344 4345 for (i=0, n_R=0; i<pcis->n; i++) { 4346 if (!PetscBTLookup(bitmask,i)) { 4347 idx_R_local[n_R++] = i; 4348 } 4349 } 4350 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4351 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4352 4353 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4354 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4355 } 4356 4357 /* Block code */ 4358 vbs = 1; 4359 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4360 if (bs>1 && !(n_vertices%bs)) { 4361 PetscBool is_blocked = PETSC_TRUE; 4362 PetscInt *vary; 4363 if (!sub_schurs || !sub_schurs->reuse_solver) { 4364 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4365 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4366 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4367 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4368 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4369 for (i=0; i<pcis->n/bs; i++) { 4370 if (vary[i]!=0 && vary[i]!=bs) { 4371 is_blocked = PETSC_FALSE; 4372 break; 4373 } 4374 } 4375 ierr = PetscFree(vary);CHKERRQ(ierr); 4376 } else { 4377 /* Verify directly the R set */ 4378 for (i=0; i<n_R/bs; i++) { 4379 PetscInt j,node=idx_R_local[bs*i]; 4380 for (j=1; j<bs; j++) { 4381 if (node != idx_R_local[bs*i+j]-j) { 4382 is_blocked = PETSC_FALSE; 4383 break; 4384 } 4385 } 4386 } 4387 } 4388 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4389 vbs = bs; 4390 for (i=0;i<n_R/vbs;i++) { 4391 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4392 } 4393 } 4394 } 4395 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4396 if (sub_schurs && sub_schurs->reuse_solver) { 4397 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4398 4399 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4400 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4401 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4402 reuse_solver->is_R = pcbddc->is_R_local; 4403 } else { 4404 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4405 } 4406 4407 /* print some info if requested */ 4408 if (pcbddc->dbg_flag) { 4409 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4410 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4411 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4414 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4415 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4416 } 4417 4418 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4419 if (!sub_schurs || !sub_schurs->reuse_solver) { 4420 IS is_aux1,is_aux2; 4421 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4422 4423 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4424 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4425 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4426 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4427 for (i=0; i<n_D; i++) { 4428 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4429 } 4430 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4431 for (i=0, j=0; i<n_R; i++) { 4432 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4433 aux_array1[j++] = i; 4434 } 4435 } 4436 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4437 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4438 for (i=0, j=0; i<n_B; i++) { 4439 if (!PetscBTLookup(bitmask,is_indices[i])) { 4440 aux_array2[j++] = i; 4441 } 4442 } 4443 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4444 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4445 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4446 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4447 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4448 4449 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4450 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4451 for (i=0, j=0; i<n_R; i++) { 4452 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4453 aux_array1[j++] = i; 4454 } 4455 } 4456 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4457 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4458 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4459 } 4460 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4461 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4462 } else { 4463 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4464 IS tis; 4465 PetscInt schur_size; 4466 4467 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4468 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4469 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4470 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4471 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4472 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4473 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4474 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4475 } 4476 } 4477 PetscFunctionReturn(0); 4478 } 4479 4480 4481 #undef __FUNCT__ 4482 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4483 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4484 { 4485 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4486 PC_IS *pcis = (PC_IS*)pc->data; 4487 PC pc_temp; 4488 Mat A_RR; 4489 MatReuse reuse; 4490 PetscScalar m_one = -1.0; 4491 PetscReal value; 4492 PetscInt n_D,n_R; 4493 PetscBool check_corr[2],issbaij; 4494 PetscErrorCode ierr; 4495 /* prefixes stuff */ 4496 char dir_prefix[256],neu_prefix[256],str_level[16]; 4497 size_t len; 4498 4499 PetscFunctionBegin; 4500 4501 /* compute prefixes */ 4502 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4503 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4504 if (!pcbddc->current_level) { 4505 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4506 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4507 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4508 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4509 } else { 4510 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4511 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4512 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4513 len -= 15; /* remove "pc_bddc_coarse_" */ 4514 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4515 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4516 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4517 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4518 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4519 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4520 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4521 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4522 } 4523 4524 /* DIRICHLET PROBLEM */ 4525 if (dirichlet) { 4526 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4527 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4528 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4529 if (pcbddc->dbg_flag) { 4530 Mat A_IIn; 4531 4532 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4533 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4534 pcis->A_II = A_IIn; 4535 } 4536 } 4537 if (pcbddc->local_mat->symmetric_set) { 4538 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4539 } 4540 /* Matrix for Dirichlet problem is pcis->A_II */ 4541 n_D = pcis->n - pcis->n_B; 4542 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4543 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4544 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4545 /* default */ 4546 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4547 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4548 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4549 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4550 if (issbaij) { 4551 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4552 } else { 4553 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4554 } 4555 /* Allow user's customization */ 4556 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4557 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4558 } 4559 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4560 if (sub_schurs && sub_schurs->reuse_solver) { 4561 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4562 4563 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4564 } 4565 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4566 if (!n_D) { 4567 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4568 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4569 } 4570 /* Set Up KSP for Dirichlet problem of BDDC */ 4571 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4572 /* set ksp_D into pcis data */ 4573 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4574 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4575 pcis->ksp_D = pcbddc->ksp_D; 4576 } 4577 4578 /* NEUMANN PROBLEM */ 4579 A_RR = 0; 4580 if (neumann) { 4581 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4582 PetscInt ibs,mbs; 4583 PetscBool issbaij; 4584 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4585 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4586 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4587 if (pcbddc->ksp_R) { /* already created ksp */ 4588 PetscInt nn_R; 4589 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4590 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4591 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4592 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4593 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4594 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4595 reuse = MAT_INITIAL_MATRIX; 4596 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4597 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4598 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4599 reuse = MAT_INITIAL_MATRIX; 4600 } else { /* safe to reuse the matrix */ 4601 reuse = MAT_REUSE_MATRIX; 4602 } 4603 } 4604 /* last check */ 4605 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4606 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4607 reuse = MAT_INITIAL_MATRIX; 4608 } 4609 } else { /* first time, so we need to create the matrix */ 4610 reuse = MAT_INITIAL_MATRIX; 4611 } 4612 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4613 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4614 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4615 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4616 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4617 if (matis->A == pcbddc->local_mat) { 4618 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4619 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4620 } else { 4621 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4622 } 4623 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4624 if (matis->A == pcbddc->local_mat) { 4625 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4626 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4627 } else { 4628 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4629 } 4630 } 4631 /* extract A_RR */ 4632 if (sub_schurs && sub_schurs->reuse_solver) { 4633 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4634 4635 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4636 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4637 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4638 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4639 } else { 4640 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4641 } 4642 } else { 4643 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4644 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4645 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4646 } 4647 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4648 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4649 } 4650 if (pcbddc->local_mat->symmetric_set) { 4651 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4652 } 4653 if (!pcbddc->ksp_R) { /* create object if not present */ 4654 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4655 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4656 /* default */ 4657 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4658 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4659 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4660 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4661 if (issbaij) { 4662 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4663 } else { 4664 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4665 } 4666 /* Allow user's customization */ 4667 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4668 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4669 } 4670 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4671 if (!n_R) { 4672 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4673 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4674 } 4675 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4676 /* Reuse solver if it is present */ 4677 if (sub_schurs && sub_schurs->reuse_solver) { 4678 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4679 4680 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4681 } 4682 /* Set Up KSP for Neumann problem of BDDC */ 4683 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4684 } 4685 4686 if (pcbddc->dbg_flag) { 4687 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4688 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4689 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4690 } 4691 4692 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4693 check_corr[0] = check_corr[1] = PETSC_FALSE; 4694 if (pcbddc->NullSpace_corr[0]) { 4695 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4696 } 4697 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4698 check_corr[0] = PETSC_TRUE; 4699 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4700 } 4701 if (neumann && pcbddc->NullSpace_corr[2]) { 4702 check_corr[1] = PETSC_TRUE; 4703 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4704 } 4705 4706 /* check Dirichlet and Neumann solvers */ 4707 if (pcbddc->dbg_flag) { 4708 if (dirichlet) { /* Dirichlet */ 4709 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4710 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4711 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4712 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4713 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4715 if (check_corr[0]) { 4716 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4717 } 4718 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4719 } 4720 if (neumann) { /* Neumann */ 4721 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4722 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4723 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4724 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4725 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4726 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4727 if (check_corr[1]) { 4728 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4729 } 4730 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4731 } 4732 } 4733 /* free Neumann problem's matrix */ 4734 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4735 PetscFunctionReturn(0); 4736 } 4737 4738 #undef __FUNCT__ 4739 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4740 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4741 { 4742 PetscErrorCode ierr; 4743 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4744 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4745 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4746 4747 PetscFunctionBegin; 4748 if (!reuse_solver) { 4749 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4750 } 4751 if (!pcbddc->switch_static) { 4752 if (applytranspose && pcbddc->local_auxmat1) { 4753 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4754 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4755 } 4756 if (!reuse_solver) { 4757 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4758 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4759 } else { 4760 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4761 4762 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4763 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4764 } 4765 } else { 4766 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4767 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4768 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4769 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4770 if (applytranspose && pcbddc->local_auxmat1) { 4771 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4772 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4773 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4774 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4775 } 4776 } 4777 if (!reuse_solver || pcbddc->switch_static) { 4778 if (applytranspose) { 4779 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4780 } else { 4781 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4782 } 4783 } else { 4784 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4785 4786 if (applytranspose) { 4787 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4788 } else { 4789 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4790 } 4791 } 4792 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4793 if (!pcbddc->switch_static) { 4794 if (!reuse_solver) { 4795 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4796 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4797 } else { 4798 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4799 4800 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4801 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4802 } 4803 if (!applytranspose && pcbddc->local_auxmat1) { 4804 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4805 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4806 } 4807 } else { 4808 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4809 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4810 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4811 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4812 if (!applytranspose && pcbddc->local_auxmat1) { 4813 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4814 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4815 } 4816 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4817 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4818 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4819 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4820 } 4821 PetscFunctionReturn(0); 4822 } 4823 4824 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4825 #undef __FUNCT__ 4826 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4827 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4828 { 4829 PetscErrorCode ierr; 4830 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4831 PC_IS* pcis = (PC_IS*) (pc->data); 4832 const PetscScalar zero = 0.0; 4833 4834 PetscFunctionBegin; 4835 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4836 if (!pcbddc->benign_apply_coarse_only) { 4837 if (applytranspose) { 4838 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4839 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4840 } else { 4841 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4842 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4843 } 4844 } else { 4845 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4846 } 4847 4848 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4849 if (pcbddc->benign_n) { 4850 PetscScalar *array; 4851 PetscInt j; 4852 4853 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4854 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4855 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4856 } 4857 4858 /* start communications from local primal nodes to rhs of coarse solver */ 4859 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4860 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4861 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4862 4863 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4864 if (pcbddc->coarse_ksp) { 4865 Mat coarse_mat; 4866 Vec rhs,sol; 4867 MatNullSpace nullsp; 4868 PetscBool isbddc = PETSC_FALSE; 4869 4870 if (pcbddc->benign_have_null) { 4871 PC coarse_pc; 4872 4873 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4874 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4875 /* we need to propagate to coarser levels the need for a possible benign correction */ 4876 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4877 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4878 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4879 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4880 } 4881 } 4882 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4883 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4884 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4885 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4886 if (nullsp) { 4887 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4888 } 4889 if (applytranspose) { 4890 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4891 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4892 } else { 4893 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4894 PC coarse_pc; 4895 4896 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4897 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4898 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4899 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4900 } else { 4901 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4902 } 4903 } 4904 /* we don't need the benign correction at coarser levels anymore */ 4905 if (pcbddc->benign_have_null && isbddc) { 4906 PC coarse_pc; 4907 PC_BDDC* coarsepcbddc; 4908 4909 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4910 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4911 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4912 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4913 } 4914 if (nullsp) { 4915 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4916 } 4917 } 4918 4919 /* Local solution on R nodes */ 4920 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4921 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4922 } 4923 /* communications from coarse sol to local primal nodes */ 4924 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4925 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4926 4927 /* Sum contributions from the two levels */ 4928 if (!pcbddc->benign_apply_coarse_only) { 4929 if (applytranspose) { 4930 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4931 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4932 } else { 4933 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4934 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4935 } 4936 /* store p0 */ 4937 if (pcbddc->benign_n) { 4938 PetscScalar *array; 4939 PetscInt j; 4940 4941 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4942 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4943 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4944 } 4945 } else { /* expand the coarse solution */ 4946 if (applytranspose) { 4947 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4948 } else { 4949 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4950 } 4951 } 4952 PetscFunctionReturn(0); 4953 } 4954 4955 #undef __FUNCT__ 4956 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4957 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4958 { 4959 PetscErrorCode ierr; 4960 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4961 PetscScalar *array; 4962 Vec from,to; 4963 4964 PetscFunctionBegin; 4965 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4966 from = pcbddc->coarse_vec; 4967 to = pcbddc->vec1_P; 4968 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4969 Vec tvec; 4970 4971 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4972 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4973 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4974 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4975 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4976 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4977 } 4978 } else { /* from local to global -> put data in coarse right hand side */ 4979 from = pcbddc->vec1_P; 4980 to = pcbddc->coarse_vec; 4981 } 4982 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4983 PetscFunctionReturn(0); 4984 } 4985 4986 #undef __FUNCT__ 4987 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4988 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4989 { 4990 PetscErrorCode ierr; 4991 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4992 PetscScalar *array; 4993 Vec from,to; 4994 4995 PetscFunctionBegin; 4996 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4997 from = pcbddc->coarse_vec; 4998 to = pcbddc->vec1_P; 4999 } else { /* from local to global -> put data in coarse right hand side */ 5000 from = pcbddc->vec1_P; 5001 to = pcbddc->coarse_vec; 5002 } 5003 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5004 if (smode == SCATTER_FORWARD) { 5005 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5006 Vec tvec; 5007 5008 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5009 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5010 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5011 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5012 } 5013 } else { 5014 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5015 ierr = VecResetArray(from);CHKERRQ(ierr); 5016 } 5017 } 5018 PetscFunctionReturn(0); 5019 } 5020 5021 /* uncomment for testing purposes */ 5022 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5023 #undef __FUNCT__ 5024 #define __FUNCT__ "PCBDDCConstraintsSetUp" 5025 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5026 { 5027 PetscErrorCode ierr; 5028 PC_IS* pcis = (PC_IS*)(pc->data); 5029 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5030 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5031 /* one and zero */ 5032 PetscScalar one=1.0,zero=0.0; 5033 /* space to store constraints and their local indices */ 5034 PetscScalar *constraints_data; 5035 PetscInt *constraints_idxs,*constraints_idxs_B; 5036 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5037 PetscInt *constraints_n; 5038 /* iterators */ 5039 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5040 /* BLAS integers */ 5041 PetscBLASInt lwork,lierr; 5042 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5043 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5044 /* reuse */ 5045 PetscInt olocal_primal_size,olocal_primal_size_cc; 5046 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5047 /* change of basis */ 5048 PetscBool qr_needed; 5049 PetscBT change_basis,qr_needed_idx; 5050 /* auxiliary stuff */ 5051 PetscInt *nnz,*is_indices; 5052 PetscInt ncc; 5053 /* some quantities */ 5054 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5055 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5056 5057 PetscFunctionBegin; 5058 /* Destroy Mat objects computed previously */ 5059 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5060 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5061 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5062 /* save info on constraints from previous setup (if any) */ 5063 olocal_primal_size = pcbddc->local_primal_size; 5064 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5065 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5066 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5067 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5068 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5069 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5070 5071 if (!pcbddc->adaptive_selection) { 5072 IS ISForVertices,*ISForFaces,*ISForEdges; 5073 MatNullSpace nearnullsp; 5074 const Vec *nearnullvecs; 5075 Vec *localnearnullsp; 5076 PetscScalar *array; 5077 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5078 PetscBool nnsp_has_cnst; 5079 /* LAPACK working arrays for SVD or POD */ 5080 PetscBool skip_lapack,boolforchange; 5081 PetscScalar *work; 5082 PetscReal *singular_vals; 5083 #if defined(PETSC_USE_COMPLEX) 5084 PetscReal *rwork; 5085 #endif 5086 #if defined(PETSC_MISSING_LAPACK_GESVD) 5087 PetscScalar *temp_basis,*correlation_mat; 5088 #else 5089 PetscBLASInt dummy_int=1; 5090 PetscScalar dummy_scalar=1.; 5091 #endif 5092 5093 /* Get index sets for faces, edges and vertices from graph */ 5094 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5095 /* print some info */ 5096 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5097 PetscInt nv; 5098 5099 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5100 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5101 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5102 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5103 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5105 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5106 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5107 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5108 } 5109 5110 /* free unneeded index sets */ 5111 if (!pcbddc->use_vertices) { 5112 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5113 } 5114 if (!pcbddc->use_edges) { 5115 for (i=0;i<n_ISForEdges;i++) { 5116 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5117 } 5118 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5119 n_ISForEdges = 0; 5120 } 5121 if (!pcbddc->use_faces) { 5122 for (i=0;i<n_ISForFaces;i++) { 5123 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5124 } 5125 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5126 n_ISForFaces = 0; 5127 } 5128 5129 /* check if near null space is attached to global mat */ 5130 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5131 if (nearnullsp) { 5132 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5133 /* remove any stored info */ 5134 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5135 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5136 /* store information for BDDC solver reuse */ 5137 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5138 pcbddc->onearnullspace = nearnullsp; 5139 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5140 for (i=0;i<nnsp_size;i++) { 5141 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5142 } 5143 } else { /* if near null space is not provided BDDC uses constants by default */ 5144 nnsp_size = 0; 5145 nnsp_has_cnst = PETSC_TRUE; 5146 } 5147 /* get max number of constraints on a single cc */ 5148 max_constraints = nnsp_size; 5149 if (nnsp_has_cnst) max_constraints++; 5150 5151 /* 5152 Evaluate maximum storage size needed by the procedure 5153 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5154 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5155 There can be multiple constraints per connected component 5156 */ 5157 n_vertices = 0; 5158 if (ISForVertices) { 5159 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5160 } 5161 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5162 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5163 5164 total_counts = n_ISForFaces+n_ISForEdges; 5165 total_counts *= max_constraints; 5166 total_counts += n_vertices; 5167 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5168 5169 total_counts = 0; 5170 max_size_of_constraint = 0; 5171 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5172 IS used_is; 5173 if (i<n_ISForEdges) { 5174 used_is = ISForEdges[i]; 5175 } else { 5176 used_is = ISForFaces[i-n_ISForEdges]; 5177 } 5178 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5179 total_counts += j; 5180 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5181 } 5182 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5183 5184 /* get local part of global near null space vectors */ 5185 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5186 for (k=0;k<nnsp_size;k++) { 5187 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5188 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5189 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5190 } 5191 5192 /* whether or not to skip lapack calls */ 5193 skip_lapack = PETSC_TRUE; 5194 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5195 5196 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5197 if (!skip_lapack) { 5198 PetscScalar temp_work; 5199 5200 #if defined(PETSC_MISSING_LAPACK_GESVD) 5201 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5202 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5203 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5204 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5205 #if defined(PETSC_USE_COMPLEX) 5206 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5207 #endif 5208 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5209 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5210 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5211 lwork = -1; 5212 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5213 #if !defined(PETSC_USE_COMPLEX) 5214 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5215 #else 5216 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5217 #endif 5218 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5219 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5220 #else /* on missing GESVD */ 5221 /* SVD */ 5222 PetscInt max_n,min_n; 5223 max_n = max_size_of_constraint; 5224 min_n = max_constraints; 5225 if (max_size_of_constraint < max_constraints) { 5226 min_n = max_size_of_constraint; 5227 max_n = max_constraints; 5228 } 5229 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5230 #if defined(PETSC_USE_COMPLEX) 5231 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5232 #endif 5233 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5234 lwork = -1; 5235 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5236 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5237 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5238 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5239 #if !defined(PETSC_USE_COMPLEX) 5240 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5241 #else 5242 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5243 #endif 5244 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5245 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5246 #endif /* on missing GESVD */ 5247 /* Allocate optimal workspace */ 5248 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5249 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5250 } 5251 /* Now we can loop on constraining sets */ 5252 total_counts = 0; 5253 constraints_idxs_ptr[0] = 0; 5254 constraints_data_ptr[0] = 0; 5255 /* vertices */ 5256 if (n_vertices) { 5257 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5258 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5259 for (i=0;i<n_vertices;i++) { 5260 constraints_n[total_counts] = 1; 5261 constraints_data[total_counts] = 1.0; 5262 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5263 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5264 total_counts++; 5265 } 5266 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5267 n_vertices = total_counts; 5268 } 5269 5270 /* edges and faces */ 5271 total_counts_cc = total_counts; 5272 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5273 IS used_is; 5274 PetscBool idxs_copied = PETSC_FALSE; 5275 5276 if (ncc<n_ISForEdges) { 5277 used_is = ISForEdges[ncc]; 5278 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5279 } else { 5280 used_is = ISForFaces[ncc-n_ISForEdges]; 5281 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5282 } 5283 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5284 5285 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5286 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5287 /* change of basis should not be performed on local periodic nodes */ 5288 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5289 if (nnsp_has_cnst) { 5290 PetscScalar quad_value; 5291 5292 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5293 idxs_copied = PETSC_TRUE; 5294 5295 if (!pcbddc->use_nnsp_true) { 5296 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5297 } else { 5298 quad_value = 1.0; 5299 } 5300 for (j=0;j<size_of_constraint;j++) { 5301 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5302 } 5303 temp_constraints++; 5304 total_counts++; 5305 } 5306 for (k=0;k<nnsp_size;k++) { 5307 PetscReal real_value; 5308 PetscScalar *ptr_to_data; 5309 5310 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5311 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5312 for (j=0;j<size_of_constraint;j++) { 5313 ptr_to_data[j] = array[is_indices[j]]; 5314 } 5315 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5316 /* check if array is null on the connected component */ 5317 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5318 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5319 if (real_value > 0.0) { /* keep indices and values */ 5320 temp_constraints++; 5321 total_counts++; 5322 if (!idxs_copied) { 5323 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5324 idxs_copied = PETSC_TRUE; 5325 } 5326 } 5327 } 5328 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5329 valid_constraints = temp_constraints; 5330 if (!pcbddc->use_nnsp_true && temp_constraints) { 5331 if (temp_constraints == 1) { /* just normalize the constraint */ 5332 PetscScalar norm,*ptr_to_data; 5333 5334 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5335 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5336 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5337 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5338 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5339 } else { /* perform SVD */ 5340 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5341 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5342 5343 #if defined(PETSC_MISSING_LAPACK_GESVD) 5344 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5345 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5346 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5347 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5348 from that computed using LAPACKgesvd 5349 -> This is due to a different computation of eigenvectors in LAPACKheev 5350 -> The quality of the POD-computed basis will be the same */ 5351 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5352 /* Store upper triangular part of correlation matrix */ 5353 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5354 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5355 for (j=0;j<temp_constraints;j++) { 5356 for (k=0;k<j+1;k++) { 5357 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5358 } 5359 } 5360 /* compute eigenvalues and eigenvectors of correlation matrix */ 5361 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5362 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5363 #if !defined(PETSC_USE_COMPLEX) 5364 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5365 #else 5366 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5367 #endif 5368 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5369 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5370 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5371 j = 0; 5372 while (j < temp_constraints && singular_vals[j] < tol) j++; 5373 total_counts = total_counts-j; 5374 valid_constraints = temp_constraints-j; 5375 /* scale and copy POD basis into used quadrature memory */ 5376 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5377 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5378 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5379 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5380 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5381 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5382 if (j<temp_constraints) { 5383 PetscInt ii; 5384 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5385 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5386 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5387 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5388 for (k=0;k<temp_constraints-j;k++) { 5389 for (ii=0;ii<size_of_constraint;ii++) { 5390 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5391 } 5392 } 5393 } 5394 #else /* on missing GESVD */ 5395 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5396 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5397 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5398 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5399 #if !defined(PETSC_USE_COMPLEX) 5400 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5401 #else 5402 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5403 #endif 5404 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5405 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5406 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5407 k = temp_constraints; 5408 if (k > size_of_constraint) k = size_of_constraint; 5409 j = 0; 5410 while (j < k && singular_vals[k-j-1] < tol) j++; 5411 valid_constraints = k-j; 5412 total_counts = total_counts-temp_constraints+valid_constraints; 5413 #endif /* on missing GESVD */ 5414 } 5415 } 5416 /* update pointers information */ 5417 if (valid_constraints) { 5418 constraints_n[total_counts_cc] = valid_constraints; 5419 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5420 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5421 /* set change_of_basis flag */ 5422 if (boolforchange) { 5423 PetscBTSet(change_basis,total_counts_cc); 5424 } 5425 total_counts_cc++; 5426 } 5427 } 5428 /* free workspace */ 5429 if (!skip_lapack) { 5430 ierr = PetscFree(work);CHKERRQ(ierr); 5431 #if defined(PETSC_USE_COMPLEX) 5432 ierr = PetscFree(rwork);CHKERRQ(ierr); 5433 #endif 5434 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5435 #if defined(PETSC_MISSING_LAPACK_GESVD) 5436 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5437 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5438 #endif 5439 } 5440 for (k=0;k<nnsp_size;k++) { 5441 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5442 } 5443 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5444 /* free index sets of faces, edges and vertices */ 5445 for (i=0;i<n_ISForFaces;i++) { 5446 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5447 } 5448 if (n_ISForFaces) { 5449 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5450 } 5451 for (i=0;i<n_ISForEdges;i++) { 5452 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5453 } 5454 if (n_ISForEdges) { 5455 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5456 } 5457 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5458 } else { 5459 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5460 5461 total_counts = 0; 5462 n_vertices = 0; 5463 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5464 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5465 } 5466 max_constraints = 0; 5467 total_counts_cc = 0; 5468 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5469 total_counts += pcbddc->adaptive_constraints_n[i]; 5470 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5471 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5472 } 5473 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5474 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5475 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5476 constraints_data = pcbddc->adaptive_constraints_data; 5477 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5478 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5479 total_counts_cc = 0; 5480 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5481 if (pcbddc->adaptive_constraints_n[i]) { 5482 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5483 } 5484 } 5485 #if 0 5486 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5487 for (i=0;i<total_counts_cc;i++) { 5488 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5489 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5490 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5491 printf(" %d",constraints_idxs[j]); 5492 } 5493 printf("\n"); 5494 printf("number of cc: %d\n",constraints_n[i]); 5495 } 5496 for (i=0;i<n_vertices;i++) { 5497 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5498 } 5499 for (i=0;i<sub_schurs->n_subs;i++) { 5500 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5501 } 5502 #endif 5503 5504 max_size_of_constraint = 0; 5505 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5506 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5507 /* Change of basis */ 5508 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5509 if (pcbddc->use_change_of_basis) { 5510 for (i=0;i<sub_schurs->n_subs;i++) { 5511 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5512 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5513 } 5514 } 5515 } 5516 } 5517 pcbddc->local_primal_size = total_counts; 5518 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5519 5520 /* map constraints_idxs in boundary numbering */ 5521 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5522 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5523 5524 /* Create constraint matrix */ 5525 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5526 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5527 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5528 5529 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5530 /* determine if a QR strategy is needed for change of basis */ 5531 qr_needed = PETSC_FALSE; 5532 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5533 total_primal_vertices=0; 5534 pcbddc->local_primal_size_cc = 0; 5535 for (i=0;i<total_counts_cc;i++) { 5536 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5537 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5538 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5539 pcbddc->local_primal_size_cc += 1; 5540 } else if (PetscBTLookup(change_basis,i)) { 5541 for (k=0;k<constraints_n[i];k++) { 5542 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5543 } 5544 pcbddc->local_primal_size_cc += constraints_n[i]; 5545 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5546 PetscBTSet(qr_needed_idx,i); 5547 qr_needed = PETSC_TRUE; 5548 } 5549 } else { 5550 pcbddc->local_primal_size_cc += 1; 5551 } 5552 } 5553 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5554 pcbddc->n_vertices = total_primal_vertices; 5555 /* permute indices in order to have a sorted set of vertices */ 5556 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5557 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5558 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5559 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5560 5561 /* nonzero structure of constraint matrix */ 5562 /* and get reference dof for local constraints */ 5563 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5564 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5565 5566 j = total_primal_vertices; 5567 total_counts = total_primal_vertices; 5568 cum = total_primal_vertices; 5569 for (i=n_vertices;i<total_counts_cc;i++) { 5570 if (!PetscBTLookup(change_basis,i)) { 5571 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5572 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5573 cum++; 5574 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5575 for (k=0;k<constraints_n[i];k++) { 5576 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5577 nnz[j+k] = size_of_constraint; 5578 } 5579 j += constraints_n[i]; 5580 } 5581 } 5582 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5583 ierr = PetscFree(nnz);CHKERRQ(ierr); 5584 5585 /* set values in constraint matrix */ 5586 for (i=0;i<total_primal_vertices;i++) { 5587 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5588 } 5589 total_counts = total_primal_vertices; 5590 for (i=n_vertices;i<total_counts_cc;i++) { 5591 if (!PetscBTLookup(change_basis,i)) { 5592 PetscInt *cols; 5593 5594 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5595 cols = constraints_idxs+constraints_idxs_ptr[i]; 5596 for (k=0;k<constraints_n[i];k++) { 5597 PetscInt row = total_counts+k; 5598 PetscScalar *vals; 5599 5600 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5601 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5602 } 5603 total_counts += constraints_n[i]; 5604 } 5605 } 5606 /* assembling */ 5607 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5608 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5609 5610 /* 5611 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5612 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5613 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5614 */ 5615 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5616 if (pcbddc->use_change_of_basis) { 5617 /* dual and primal dofs on a single cc */ 5618 PetscInt dual_dofs,primal_dofs; 5619 /* working stuff for GEQRF */ 5620 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5621 PetscBLASInt lqr_work; 5622 /* working stuff for UNGQR */ 5623 PetscScalar *gqr_work,lgqr_work_t; 5624 PetscBLASInt lgqr_work; 5625 /* working stuff for TRTRS */ 5626 PetscScalar *trs_rhs; 5627 PetscBLASInt Blas_NRHS; 5628 /* pointers for values insertion into change of basis matrix */ 5629 PetscInt *start_rows,*start_cols; 5630 PetscScalar *start_vals; 5631 /* working stuff for values insertion */ 5632 PetscBT is_primal; 5633 PetscInt *aux_primal_numbering_B; 5634 /* matrix sizes */ 5635 PetscInt global_size,local_size; 5636 /* temporary change of basis */ 5637 Mat localChangeOfBasisMatrix; 5638 /* extra space for debugging */ 5639 PetscScalar *dbg_work; 5640 5641 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5642 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5643 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5644 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5645 /* nonzeros for local mat */ 5646 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5647 if (!pcbddc->benign_change || pcbddc->fake_change) { 5648 for (i=0;i<pcis->n;i++) nnz[i]=1; 5649 } else { 5650 const PetscInt *ii; 5651 PetscInt n; 5652 PetscBool flg_row; 5653 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5654 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5655 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5656 } 5657 for (i=n_vertices;i<total_counts_cc;i++) { 5658 if (PetscBTLookup(change_basis,i)) { 5659 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5660 if (PetscBTLookup(qr_needed_idx,i)) { 5661 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5662 } else { 5663 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5664 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5665 } 5666 } 5667 } 5668 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5669 ierr = PetscFree(nnz);CHKERRQ(ierr); 5670 /* Set interior change in the matrix */ 5671 if (!pcbddc->benign_change || pcbddc->fake_change) { 5672 for (i=0;i<pcis->n;i++) { 5673 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5674 } 5675 } else { 5676 const PetscInt *ii,*jj; 5677 PetscScalar *aa; 5678 PetscInt n; 5679 PetscBool flg_row; 5680 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5681 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5682 for (i=0;i<n;i++) { 5683 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5684 } 5685 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5686 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5687 } 5688 5689 if (pcbddc->dbg_flag) { 5690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5692 } 5693 5694 5695 /* Now we loop on the constraints which need a change of basis */ 5696 /* 5697 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5698 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5699 5700 Basic blocks of change of basis matrix T computed by 5701 5702 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5703 5704 | 1 0 ... 0 s_1/S | 5705 | 0 1 ... 0 s_2/S | 5706 | ... | 5707 | 0 ... 1 s_{n-1}/S | 5708 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5709 5710 with S = \sum_{i=1}^n s_i^2 5711 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5712 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5713 5714 - QR decomposition of constraints otherwise 5715 */ 5716 if (qr_needed) { 5717 /* space to store Q */ 5718 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5719 /* array to store scaling factors for reflectors */ 5720 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5721 /* first we issue queries for optimal work */ 5722 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5723 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5724 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5725 lqr_work = -1; 5726 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5727 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5728 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5729 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5730 lgqr_work = -1; 5731 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5732 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5733 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5734 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5735 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5736 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5737 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5738 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5739 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5740 /* array to store rhs and solution of triangular solver */ 5741 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5742 /* allocating workspace for check */ 5743 if (pcbddc->dbg_flag) { 5744 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5745 } 5746 } 5747 /* array to store whether a node is primal or not */ 5748 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5749 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5750 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5751 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5752 for (i=0;i<total_primal_vertices;i++) { 5753 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5754 } 5755 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5756 5757 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5758 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5759 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5760 if (PetscBTLookup(change_basis,total_counts)) { 5761 /* get constraint info */ 5762 primal_dofs = constraints_n[total_counts]; 5763 dual_dofs = size_of_constraint-primal_dofs; 5764 5765 if (pcbddc->dbg_flag) { 5766 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5767 } 5768 5769 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5770 5771 /* copy quadrature constraints for change of basis check */ 5772 if (pcbddc->dbg_flag) { 5773 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5774 } 5775 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5776 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5777 5778 /* compute QR decomposition of constraints */ 5779 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5780 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5781 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5782 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5783 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5784 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5785 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5786 5787 /* explictly compute R^-T */ 5788 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5789 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5790 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5791 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5792 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5793 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5794 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5795 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5796 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5797 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5798 5799 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5800 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5801 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5802 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5803 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5804 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5805 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5806 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5807 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5808 5809 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5810 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5811 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5812 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5813 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5814 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5815 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5816 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5817 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5818 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5819 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5820 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5821 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5822 5823 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5824 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5825 /* insert cols for primal dofs */ 5826 for (j=0;j<primal_dofs;j++) { 5827 start_vals = &qr_basis[j*size_of_constraint]; 5828 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5829 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5830 } 5831 /* insert cols for dual dofs */ 5832 for (j=0,k=0;j<dual_dofs;k++) { 5833 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5834 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5835 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5836 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5837 j++; 5838 } 5839 } 5840 5841 /* check change of basis */ 5842 if (pcbddc->dbg_flag) { 5843 PetscInt ii,jj; 5844 PetscBool valid_qr=PETSC_TRUE; 5845 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5846 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5847 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5848 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5849 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5850 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5851 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5852 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5853 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5854 for (jj=0;jj<size_of_constraint;jj++) { 5855 for (ii=0;ii<primal_dofs;ii++) { 5856 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5857 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5858 } 5859 } 5860 if (!valid_qr) { 5861 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5862 for (jj=0;jj<size_of_constraint;jj++) { 5863 for (ii=0;ii<primal_dofs;ii++) { 5864 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5865 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5866 } 5867 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5868 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5869 } 5870 } 5871 } 5872 } else { 5873 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5874 } 5875 } 5876 } else { /* simple transformation block */ 5877 PetscInt row,col; 5878 PetscScalar val,norm; 5879 5880 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5881 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5882 for (j=0;j<size_of_constraint;j++) { 5883 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5884 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5885 if (!PetscBTLookup(is_primal,row_B)) { 5886 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5887 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5888 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5889 } else { 5890 for (k=0;k<size_of_constraint;k++) { 5891 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5892 if (row != col) { 5893 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5894 } else { 5895 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5896 } 5897 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5898 } 5899 } 5900 } 5901 if (pcbddc->dbg_flag) { 5902 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5903 } 5904 } 5905 } else { 5906 if (pcbddc->dbg_flag) { 5907 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5908 } 5909 } 5910 } 5911 5912 /* free workspace */ 5913 if (qr_needed) { 5914 if (pcbddc->dbg_flag) { 5915 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5916 } 5917 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5918 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5919 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5920 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5921 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5922 } 5923 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5924 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5925 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5926 5927 /* assembling of global change of variable */ 5928 if (!pcbddc->fake_change) { 5929 Mat tmat; 5930 PetscInt bs; 5931 5932 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5933 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5934 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5935 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5936 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5937 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5938 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5939 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5940 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5941 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5942 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5943 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5944 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5945 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5946 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5947 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5948 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5949 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5950 5951 /* check */ 5952 if (pcbddc->dbg_flag) { 5953 PetscReal error; 5954 Vec x,x_change; 5955 5956 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5957 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5958 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5959 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5960 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5961 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5962 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5963 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5964 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5965 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5966 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5967 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5968 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5969 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5970 ierr = VecDestroy(&x);CHKERRQ(ierr); 5971 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5972 } 5973 /* adapt sub_schurs computed (if any) */ 5974 if (pcbddc->use_deluxe_scaling) { 5975 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5976 5977 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5978 if (sub_schurs && sub_schurs->S_Ej_all) { 5979 Mat S_new,tmat; 5980 IS is_all_N,is_V_Sall = NULL; 5981 5982 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5983 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5984 if (pcbddc->deluxe_zerorows) { 5985 ISLocalToGlobalMapping NtoSall; 5986 IS is_V; 5987 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5988 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5989 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5990 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5991 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5992 } 5993 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5994 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5995 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5996 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5997 if (pcbddc->deluxe_zerorows) { 5998 const PetscScalar *array; 5999 const PetscInt *idxs_V,*idxs_all; 6000 PetscInt i,n_V; 6001 6002 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6003 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6004 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6005 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6006 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6007 for (i=0;i<n_V;i++) { 6008 PetscScalar val; 6009 PetscInt idx; 6010 6011 idx = idxs_V[i]; 6012 val = array[idxs_all[idxs_V[i]]]; 6013 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6014 } 6015 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6016 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6017 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6018 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6019 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6020 } 6021 sub_schurs->S_Ej_all = S_new; 6022 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6023 if (sub_schurs->sum_S_Ej_all) { 6024 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6025 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6026 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6027 if (pcbddc->deluxe_zerorows) { 6028 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6029 } 6030 sub_schurs->sum_S_Ej_all = S_new; 6031 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6032 } 6033 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6034 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6035 } 6036 /* destroy any change of basis context in sub_schurs */ 6037 if (sub_schurs && sub_schurs->change) { 6038 PetscInt i; 6039 6040 for (i=0;i<sub_schurs->n_subs;i++) { 6041 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6042 } 6043 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6044 } 6045 } 6046 if (pcbddc->switch_static) { /* need to save the local change */ 6047 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6048 } else { 6049 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6050 } 6051 /* determine if any process has changed the pressures locally */ 6052 pcbddc->change_interior = pcbddc->benign_have_null; 6053 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6054 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6055 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6056 pcbddc->use_qr_single = qr_needed; 6057 } 6058 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6059 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6060 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6061 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6062 } else { 6063 Mat benign_global = NULL; 6064 if (pcbddc->benign_have_null) { 6065 Mat tmat; 6066 6067 pcbddc->change_interior = PETSC_TRUE; 6068 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6069 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6070 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6071 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6072 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6073 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6074 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6075 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6076 if (pcbddc->benign_change) { 6077 Mat M; 6078 6079 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6080 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6081 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6082 ierr = MatDestroy(&M);CHKERRQ(ierr); 6083 } else { 6084 Mat eye; 6085 PetscScalar *array; 6086 6087 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6088 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6089 for (i=0;i<pcis->n;i++) { 6090 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6091 } 6092 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6093 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6094 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6095 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6096 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6097 } 6098 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6099 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6100 } 6101 if (pcbddc->user_ChangeOfBasisMatrix) { 6102 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6103 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6104 } else if (pcbddc->benign_have_null) { 6105 pcbddc->ChangeOfBasisMatrix = benign_global; 6106 } 6107 } 6108 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6109 IS is_global; 6110 const PetscInt *gidxs; 6111 6112 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6113 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6114 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6115 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6116 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6117 } 6118 } 6119 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6120 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6121 } 6122 6123 if (!pcbddc->fake_change) { 6124 /* add pressure dofs to set of primal nodes for numbering purposes */ 6125 for (i=0;i<pcbddc->benign_n;i++) { 6126 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6127 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6128 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6129 pcbddc->local_primal_size_cc++; 6130 pcbddc->local_primal_size++; 6131 } 6132 6133 /* check if a new primal space has been introduced (also take into account benign trick) */ 6134 pcbddc->new_primal_space_local = PETSC_TRUE; 6135 if (olocal_primal_size == pcbddc->local_primal_size) { 6136 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6137 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6138 if (!pcbddc->new_primal_space_local) { 6139 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6140 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6141 } 6142 } 6143 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6144 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6145 } 6146 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6147 6148 /* flush dbg viewer */ 6149 if (pcbddc->dbg_flag) { 6150 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6151 } 6152 6153 /* free workspace */ 6154 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6155 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6156 if (!pcbddc->adaptive_selection) { 6157 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6158 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6159 } else { 6160 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6161 pcbddc->adaptive_constraints_idxs_ptr, 6162 pcbddc->adaptive_constraints_data_ptr, 6163 pcbddc->adaptive_constraints_idxs, 6164 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6165 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6166 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6167 } 6168 PetscFunctionReturn(0); 6169 } 6170 6171 #undef __FUNCT__ 6172 #define __FUNCT__ "PCBDDCAnalyzeInterface" 6173 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6174 { 6175 ISLocalToGlobalMapping map; 6176 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6177 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6178 PetscInt ierr,i,N; 6179 6180 PetscFunctionBegin; 6181 if (pcbddc->recompute_topography) { 6182 pcbddc->graphanalyzed = PETSC_FALSE; 6183 /* Reset previously computed graph */ 6184 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6185 /* Init local Graph struct */ 6186 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6187 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6188 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6189 6190 /* Check validity of the csr graph passed in by the user */ 6191 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6192 6193 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6194 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 6195 PetscInt *xadj,*adjncy; 6196 PetscInt nvtxs; 6197 PetscBool flg_row=PETSC_FALSE; 6198 6199 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6200 if (flg_row) { 6201 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6202 pcbddc->computed_rowadj = PETSC_TRUE; 6203 } 6204 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6205 } 6206 if (pcbddc->dbg_flag) { 6207 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6208 } 6209 6210 /* Setup of Graph */ 6211 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6212 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6213 6214 /* attach info on disconnected subdomains if present */ 6215 if (pcbddc->n_local_subs) { 6216 PetscInt *local_subs; 6217 6218 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6219 for (i=0;i<pcbddc->n_local_subs;i++) { 6220 const PetscInt *idxs; 6221 PetscInt nl,j; 6222 6223 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6224 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6225 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6226 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6227 } 6228 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6229 pcbddc->mat_graph->local_subs = local_subs; 6230 } 6231 } 6232 6233 if (!pcbddc->graphanalyzed) { 6234 /* Graph's connected components analysis */ 6235 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6236 pcbddc->graphanalyzed = PETSC_TRUE; 6237 } 6238 PetscFunctionReturn(0); 6239 } 6240 6241 #undef __FUNCT__ 6242 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6243 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6244 { 6245 PetscInt i,j; 6246 PetscScalar *alphas; 6247 PetscErrorCode ierr; 6248 6249 PetscFunctionBegin; 6250 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6251 for (i=0;i<n;i++) { 6252 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6253 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6254 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6255 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6256 } 6257 ierr = PetscFree(alphas);CHKERRQ(ierr); 6258 PetscFunctionReturn(0); 6259 } 6260 6261 #undef __FUNCT__ 6262 #define __FUNCT__ "MatISGetSubassemblingPattern" 6263 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6264 { 6265 Mat A; 6266 PetscInt n_neighs,*neighs,*n_shared,**shared; 6267 PetscMPIInt size,rank,color; 6268 PetscInt *xadj,*adjncy; 6269 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6270 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 6271 PetscInt void_procs,*procs_candidates = NULL; 6272 PetscInt xadj_count, *count; 6273 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6274 PetscSubcomm psubcomm; 6275 MPI_Comm subcomm; 6276 PetscErrorCode ierr; 6277 6278 PetscFunctionBegin; 6279 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6280 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6281 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6282 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6283 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6284 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6285 6286 if (have_void) *have_void = PETSC_FALSE; 6287 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6288 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6289 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6290 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6291 im_active = !!(n); 6292 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6293 void_procs = size - active_procs; 6294 /* get ranks of of non-active processes in mat communicator */ 6295 if (void_procs) { 6296 PetscInt ncand; 6297 6298 if (have_void) *have_void = PETSC_TRUE; 6299 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6300 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6301 for (i=0,ncand=0;i<size;i++) { 6302 if (!procs_candidates[i]) { 6303 procs_candidates[ncand++] = i; 6304 } 6305 } 6306 /* force n_subdomains to be not greater that the number of non-active processes */ 6307 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6308 } 6309 6310 /* number of subdomains requested greater than active processes -> just shift the matrix 6311 number of subdomains requested 1 -> send to master or first candidate in voids */ 6312 if (active_procs < *n_subdomains || *n_subdomains == 1) { 6313 PetscInt issize,isidx,dest; 6314 if (*n_subdomains == 1) dest = 0; 6315 else dest = rank; 6316 if (im_active) { 6317 issize = 1; 6318 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6319 isidx = procs_candidates[dest]; 6320 } else { 6321 isidx = dest; 6322 } 6323 } else { 6324 issize = 0; 6325 isidx = -1; 6326 } 6327 *n_subdomains = active_procs; 6328 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6329 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6330 PetscFunctionReturn(0); 6331 } 6332 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6333 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6334 threshold = PetscMax(threshold,2); 6335 6336 /* Get info on mapping */ 6337 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6338 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6339 6340 /* build local CSR graph of subdomains' connectivity */ 6341 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6342 xadj[0] = 0; 6343 xadj[1] = PetscMax(n_neighs-1,0); 6344 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6345 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6346 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6347 for (i=1;i<n_neighs;i++) 6348 for (j=0;j<n_shared[i];j++) 6349 count[shared[i][j]] += 1; 6350 6351 xadj_count = 0; 6352 for (i=1;i<n_neighs;i++) { 6353 for (j=0;j<n_shared[i];j++) { 6354 if (count[shared[i][j]] < threshold) { 6355 adjncy[xadj_count] = neighs[i]; 6356 adjncy_wgt[xadj_count] = n_shared[i]; 6357 xadj_count++; 6358 break; 6359 } 6360 } 6361 } 6362 xadj[1] = xadj_count; 6363 ierr = PetscFree(count);CHKERRQ(ierr); 6364 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6365 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6366 6367 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6368 6369 /* Restrict work on active processes only */ 6370 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6371 if (void_procs) { 6372 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6373 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6374 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6375 subcomm = PetscSubcommChild(psubcomm); 6376 } else { 6377 psubcomm = NULL; 6378 subcomm = PetscObjectComm((PetscObject)mat); 6379 } 6380 6381 v_wgt = NULL; 6382 if (!color) { 6383 ierr = PetscFree(xadj);CHKERRQ(ierr); 6384 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6385 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6386 } else { 6387 Mat subdomain_adj; 6388 IS new_ranks,new_ranks_contig; 6389 MatPartitioning partitioner; 6390 PetscInt rstart=0,rend=0; 6391 PetscInt *is_indices,*oldranks; 6392 PetscMPIInt size; 6393 PetscBool aggregate; 6394 6395 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6396 if (void_procs) { 6397 PetscInt prank = rank; 6398 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6399 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6400 for (i=0;i<xadj[1];i++) { 6401 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6402 } 6403 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6404 } else { 6405 oldranks = NULL; 6406 } 6407 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6408 if (aggregate) { /* TODO: all this part could be made more efficient */ 6409 PetscInt lrows,row,ncols,*cols; 6410 PetscMPIInt nrank; 6411 PetscScalar *vals; 6412 6413 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6414 lrows = 0; 6415 if (nrank<redprocs) { 6416 lrows = size/redprocs; 6417 if (nrank<size%redprocs) lrows++; 6418 } 6419 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6420 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6421 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6422 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6423 row = nrank; 6424 ncols = xadj[1]-xadj[0]; 6425 cols = adjncy; 6426 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6427 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6428 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6429 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6430 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6431 ierr = PetscFree(xadj);CHKERRQ(ierr); 6432 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6433 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6434 ierr = PetscFree(vals);CHKERRQ(ierr); 6435 if (use_vwgt) { 6436 Vec v; 6437 const PetscScalar *array; 6438 PetscInt nl; 6439 6440 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6441 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6442 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6443 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6444 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6445 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6446 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6447 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6448 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6449 ierr = VecDestroy(&v);CHKERRQ(ierr); 6450 } 6451 } else { 6452 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6453 if (use_vwgt) { 6454 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6455 v_wgt[0] = local_size; 6456 } 6457 } 6458 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6459 6460 /* Partition */ 6461 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6462 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6463 if (v_wgt) { 6464 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6465 } 6466 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6467 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6468 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6469 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6470 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6471 6472 /* renumber new_ranks to avoid "holes" in new set of processors */ 6473 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6474 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6475 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6476 if (!aggregate) { 6477 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6478 #if defined(PETSC_USE_DEBUG) 6479 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6480 #endif 6481 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6482 } else if (oldranks) { 6483 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6484 } else { 6485 ranks_send_to_idx[0] = is_indices[0]; 6486 } 6487 } else { 6488 PetscInt idxs[1]; 6489 PetscMPIInt tag; 6490 MPI_Request *reqs; 6491 6492 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6493 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6494 for (i=rstart;i<rend;i++) { 6495 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6496 } 6497 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6498 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6499 ierr = PetscFree(reqs);CHKERRQ(ierr); 6500 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6501 #if defined(PETSC_USE_DEBUG) 6502 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6503 #endif 6504 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6505 } else if (oldranks) { 6506 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6507 } else { 6508 ranks_send_to_idx[0] = idxs[0]; 6509 } 6510 } 6511 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6512 /* clean up */ 6513 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6514 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6515 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6516 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6517 } 6518 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6519 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6520 6521 /* assemble parallel IS for sends */ 6522 i = 1; 6523 if (!color) i=0; 6524 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6525 PetscFunctionReturn(0); 6526 } 6527 6528 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6529 6530 #undef __FUNCT__ 6531 #define __FUNCT__ "PCBDDCMatISSubassemble" 6532 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6533 { 6534 Mat local_mat; 6535 IS is_sends_internal; 6536 PetscInt rows,cols,new_local_rows; 6537 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6538 PetscBool ismatis,isdense,newisdense,destroy_mat; 6539 ISLocalToGlobalMapping l2gmap; 6540 PetscInt* l2gmap_indices; 6541 const PetscInt* is_indices; 6542 MatType new_local_type; 6543 /* buffers */ 6544 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6545 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6546 PetscInt *recv_buffer_idxs_local; 6547 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6548 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6549 /* MPI */ 6550 MPI_Comm comm,comm_n; 6551 PetscSubcomm subcomm; 6552 PetscMPIInt n_sends,n_recvs,commsize; 6553 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6554 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6555 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6556 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6557 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6558 PetscErrorCode ierr; 6559 6560 PetscFunctionBegin; 6561 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6562 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6563 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6564 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6565 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6566 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6567 PetscValidLogicalCollectiveBool(mat,reuse,6); 6568 PetscValidLogicalCollectiveInt(mat,nis,8); 6569 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6570 if (nvecs) { 6571 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6572 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6573 } 6574 /* further checks */ 6575 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6576 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6577 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6578 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6579 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6580 if (reuse && *mat_n) { 6581 PetscInt mrows,mcols,mnrows,mncols; 6582 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6583 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6584 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6585 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6586 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6587 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6588 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6589 } 6590 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6591 PetscValidLogicalCollectiveInt(mat,bs,0); 6592 6593 /* prepare IS for sending if not provided */ 6594 if (!is_sends) { 6595 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6596 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6597 } else { 6598 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6599 is_sends_internal = is_sends; 6600 } 6601 6602 /* get comm */ 6603 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6604 6605 /* compute number of sends */ 6606 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6607 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6608 6609 /* compute number of receives */ 6610 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6611 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6612 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6613 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6614 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6615 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6616 ierr = PetscFree(iflags);CHKERRQ(ierr); 6617 6618 /* restrict comm if requested */ 6619 subcomm = 0; 6620 destroy_mat = PETSC_FALSE; 6621 if (restrict_comm) { 6622 PetscMPIInt color,subcommsize; 6623 6624 color = 0; 6625 if (restrict_full) { 6626 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6627 } else { 6628 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6629 } 6630 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6631 subcommsize = commsize - subcommsize; 6632 /* check if reuse has been requested */ 6633 if (reuse) { 6634 if (*mat_n) { 6635 PetscMPIInt subcommsize2; 6636 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6637 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6638 comm_n = PetscObjectComm((PetscObject)*mat_n); 6639 } else { 6640 comm_n = PETSC_COMM_SELF; 6641 } 6642 } else { /* MAT_INITIAL_MATRIX */ 6643 PetscMPIInt rank; 6644 6645 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6646 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6647 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6648 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6649 comm_n = PetscSubcommChild(subcomm); 6650 } 6651 /* flag to destroy *mat_n if not significative */ 6652 if (color) destroy_mat = PETSC_TRUE; 6653 } else { 6654 comm_n = comm; 6655 } 6656 6657 /* prepare send/receive buffers */ 6658 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6659 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6660 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6661 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6662 if (nis) { 6663 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6664 } 6665 6666 /* Get data from local matrices */ 6667 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6668 /* TODO: See below some guidelines on how to prepare the local buffers */ 6669 /* 6670 send_buffer_vals should contain the raw values of the local matrix 6671 send_buffer_idxs should contain: 6672 - MatType_PRIVATE type 6673 - PetscInt size_of_l2gmap 6674 - PetscInt global_row_indices[size_of_l2gmap] 6675 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6676 */ 6677 else { 6678 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6679 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6680 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6681 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6682 send_buffer_idxs[1] = i; 6683 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6684 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6685 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6686 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6687 for (i=0;i<n_sends;i++) { 6688 ilengths_vals[is_indices[i]] = len*len; 6689 ilengths_idxs[is_indices[i]] = len+2; 6690 } 6691 } 6692 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6693 /* additional is (if any) */ 6694 if (nis) { 6695 PetscMPIInt psum; 6696 PetscInt j; 6697 for (j=0,psum=0;j<nis;j++) { 6698 PetscInt plen; 6699 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6700 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6701 psum += len+1; /* indices + lenght */ 6702 } 6703 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6704 for (j=0,psum=0;j<nis;j++) { 6705 PetscInt plen; 6706 const PetscInt *is_array_idxs; 6707 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6708 send_buffer_idxs_is[psum] = plen; 6709 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6710 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6711 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6712 psum += plen+1; /* indices + lenght */ 6713 } 6714 for (i=0;i<n_sends;i++) { 6715 ilengths_idxs_is[is_indices[i]] = psum; 6716 } 6717 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6718 } 6719 6720 buf_size_idxs = 0; 6721 buf_size_vals = 0; 6722 buf_size_idxs_is = 0; 6723 buf_size_vecs = 0; 6724 for (i=0;i<n_recvs;i++) { 6725 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6726 buf_size_vals += (PetscInt)olengths_vals[i]; 6727 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6728 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6729 } 6730 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6731 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6732 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6733 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6734 6735 /* get new tags for clean communications */ 6736 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6737 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6738 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6739 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6740 6741 /* allocate for requests */ 6742 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6743 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6744 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6745 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6746 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6747 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6748 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6749 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6750 6751 /* communications */ 6752 ptr_idxs = recv_buffer_idxs; 6753 ptr_vals = recv_buffer_vals; 6754 ptr_idxs_is = recv_buffer_idxs_is; 6755 ptr_vecs = recv_buffer_vecs; 6756 for (i=0;i<n_recvs;i++) { 6757 source_dest = onodes[i]; 6758 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6759 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6760 ptr_idxs += olengths_idxs[i]; 6761 ptr_vals += olengths_vals[i]; 6762 if (nis) { 6763 source_dest = onodes_is[i]; 6764 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6765 ptr_idxs_is += olengths_idxs_is[i]; 6766 } 6767 if (nvecs) { 6768 source_dest = onodes[i]; 6769 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6770 ptr_vecs += olengths_idxs[i]-2; 6771 } 6772 } 6773 for (i=0;i<n_sends;i++) { 6774 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6775 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6776 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6777 if (nis) { 6778 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6779 } 6780 if (nvecs) { 6781 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6782 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6783 } 6784 } 6785 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6786 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6787 6788 /* assemble new l2g map */ 6789 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6790 ptr_idxs = recv_buffer_idxs; 6791 new_local_rows = 0; 6792 for (i=0;i<n_recvs;i++) { 6793 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6794 ptr_idxs += olengths_idxs[i]; 6795 } 6796 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6797 ptr_idxs = recv_buffer_idxs; 6798 new_local_rows = 0; 6799 for (i=0;i<n_recvs;i++) { 6800 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6801 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6802 ptr_idxs += olengths_idxs[i]; 6803 } 6804 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6805 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6806 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6807 6808 /* infer new local matrix type from received local matrices type */ 6809 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6810 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6811 if (n_recvs) { 6812 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6813 ptr_idxs = recv_buffer_idxs; 6814 for (i=0;i<n_recvs;i++) { 6815 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6816 new_local_type_private = MATAIJ_PRIVATE; 6817 break; 6818 } 6819 ptr_idxs += olengths_idxs[i]; 6820 } 6821 switch (new_local_type_private) { 6822 case MATDENSE_PRIVATE: 6823 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6824 new_local_type = MATSEQAIJ; 6825 bs = 1; 6826 } else { /* if I receive only 1 dense matrix */ 6827 new_local_type = MATSEQDENSE; 6828 bs = 1; 6829 } 6830 break; 6831 case MATAIJ_PRIVATE: 6832 new_local_type = MATSEQAIJ; 6833 bs = 1; 6834 break; 6835 case MATBAIJ_PRIVATE: 6836 new_local_type = MATSEQBAIJ; 6837 break; 6838 case MATSBAIJ_PRIVATE: 6839 new_local_type = MATSEQSBAIJ; 6840 break; 6841 default: 6842 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6843 break; 6844 } 6845 } else { /* by default, new_local_type is seqdense */ 6846 new_local_type = MATSEQDENSE; 6847 bs = 1; 6848 } 6849 6850 /* create MATIS object if needed */ 6851 if (!reuse) { 6852 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6853 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6854 } else { 6855 /* it also destroys the local matrices */ 6856 if (*mat_n) { 6857 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6858 } else { /* this is a fake object */ 6859 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6860 } 6861 } 6862 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6863 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6864 6865 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6866 6867 /* Global to local map of received indices */ 6868 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6869 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6870 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6871 6872 /* restore attributes -> type of incoming data and its size */ 6873 buf_size_idxs = 0; 6874 for (i=0;i<n_recvs;i++) { 6875 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6876 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6877 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6878 } 6879 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6880 6881 /* set preallocation */ 6882 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6883 if (!newisdense) { 6884 PetscInt *new_local_nnz=0; 6885 6886 ptr_idxs = recv_buffer_idxs_local; 6887 if (n_recvs) { 6888 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6889 } 6890 for (i=0;i<n_recvs;i++) { 6891 PetscInt j; 6892 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6893 for (j=0;j<*(ptr_idxs+1);j++) { 6894 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6895 } 6896 } else { 6897 /* TODO */ 6898 } 6899 ptr_idxs += olengths_idxs[i]; 6900 } 6901 if (new_local_nnz) { 6902 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6903 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6904 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6905 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6906 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6907 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6908 } else { 6909 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6910 } 6911 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6912 } else { 6913 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6914 } 6915 6916 /* set values */ 6917 ptr_vals = recv_buffer_vals; 6918 ptr_idxs = recv_buffer_idxs_local; 6919 for (i=0;i<n_recvs;i++) { 6920 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6921 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6922 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6923 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6924 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6925 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6926 } else { 6927 /* TODO */ 6928 } 6929 ptr_idxs += olengths_idxs[i]; 6930 ptr_vals += olengths_vals[i]; 6931 } 6932 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6933 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6934 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6935 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6936 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6937 6938 #if 0 6939 if (!restrict_comm) { /* check */ 6940 Vec lvec,rvec; 6941 PetscReal infty_error; 6942 6943 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6944 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6945 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6946 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6947 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6948 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6949 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6950 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6951 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6952 } 6953 #endif 6954 6955 /* assemble new additional is (if any) */ 6956 if (nis) { 6957 PetscInt **temp_idxs,*count_is,j,psum; 6958 6959 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6960 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6961 ptr_idxs = recv_buffer_idxs_is; 6962 psum = 0; 6963 for (i=0;i<n_recvs;i++) { 6964 for (j=0;j<nis;j++) { 6965 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6966 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6967 psum += plen; 6968 ptr_idxs += plen+1; /* shift pointer to received data */ 6969 } 6970 } 6971 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6972 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6973 for (i=1;i<nis;i++) { 6974 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6975 } 6976 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6977 ptr_idxs = recv_buffer_idxs_is; 6978 for (i=0;i<n_recvs;i++) { 6979 for (j=0;j<nis;j++) { 6980 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6981 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6982 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6983 ptr_idxs += plen+1; /* shift pointer to received data */ 6984 } 6985 } 6986 for (i=0;i<nis;i++) { 6987 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6988 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6989 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6990 } 6991 ierr = PetscFree(count_is);CHKERRQ(ierr); 6992 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6993 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6994 } 6995 /* free workspace */ 6996 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6997 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6998 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6999 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7000 if (isdense) { 7001 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7002 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7003 } else { 7004 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7005 } 7006 if (nis) { 7007 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7008 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7009 } 7010 7011 if (nvecs) { 7012 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7013 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7014 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7015 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7016 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7017 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7018 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7019 /* set values */ 7020 ptr_vals = recv_buffer_vecs; 7021 ptr_idxs = recv_buffer_idxs_local; 7022 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7023 for (i=0;i<n_recvs;i++) { 7024 PetscInt j; 7025 for (j=0;j<*(ptr_idxs+1);j++) { 7026 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7027 } 7028 ptr_idxs += olengths_idxs[i]; 7029 ptr_vals += olengths_idxs[i]-2; 7030 } 7031 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7032 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7033 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7034 } 7035 7036 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7037 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7038 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7039 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7040 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7041 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7042 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7043 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7044 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7045 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7046 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7047 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7048 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7049 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7050 ierr = PetscFree(onodes);CHKERRQ(ierr); 7051 if (nis) { 7052 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7053 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7054 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7055 } 7056 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7057 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7058 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7059 for (i=0;i<nis;i++) { 7060 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7061 } 7062 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7063 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7064 } 7065 *mat_n = NULL; 7066 } 7067 PetscFunctionReturn(0); 7068 } 7069 7070 /* temporary hack into ksp private data structure */ 7071 #include <petsc/private/kspimpl.h> 7072 7073 #undef __FUNCT__ 7074 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 7075 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7076 { 7077 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7078 PC_IS *pcis = (PC_IS*)pc->data; 7079 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7080 Mat coarsedivudotp = NULL; 7081 Mat coarseG,t_coarse_mat_is; 7082 MatNullSpace CoarseNullSpace = NULL; 7083 ISLocalToGlobalMapping coarse_islg; 7084 IS coarse_is,*isarray; 7085 PetscInt i,im_active=-1,active_procs=-1; 7086 PetscInt nis,nisdofs,nisneu,nisvert; 7087 PC pc_temp; 7088 PCType coarse_pc_type; 7089 KSPType coarse_ksp_type; 7090 PetscBool multilevel_requested,multilevel_allowed; 7091 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7092 PetscInt ncoarse,nedcfield; 7093 PetscBool compute_vecs = PETSC_FALSE; 7094 PetscScalar *array; 7095 MatReuse coarse_mat_reuse; 7096 PetscBool restr, full_restr, have_void; 7097 PetscErrorCode ierr; 7098 7099 PetscFunctionBegin; 7100 /* Assign global numbering to coarse dofs */ 7101 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7102 PetscInt ocoarse_size; 7103 compute_vecs = PETSC_TRUE; 7104 ocoarse_size = pcbddc->coarse_size; 7105 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7106 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7107 /* see if we can avoid some work */ 7108 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7109 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7110 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7111 PC pc; 7112 PetscBool isbddc; 7113 7114 /* temporary workaround since PCBDDC does not have a reset method so far */ 7115 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 7116 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 7117 if (isbddc) { 7118 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 7119 } else { 7120 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7121 } 7122 coarse_reuse = PETSC_FALSE; 7123 } else { /* we can safely reuse already computed coarse matrix */ 7124 coarse_reuse = PETSC_TRUE; 7125 } 7126 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7127 coarse_reuse = PETSC_FALSE; 7128 } 7129 /* reset any subassembling information */ 7130 if (!coarse_reuse || pcbddc->recompute_topography) { 7131 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7132 } 7133 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7134 coarse_reuse = PETSC_TRUE; 7135 } 7136 /* assemble coarse matrix */ 7137 if (coarse_reuse && pcbddc->coarse_ksp) { 7138 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7139 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7140 coarse_mat_reuse = MAT_REUSE_MATRIX; 7141 } else { 7142 coarse_mat = NULL; 7143 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7144 } 7145 7146 /* creates temporary l2gmap and IS for coarse indexes */ 7147 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7148 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7149 7150 /* creates temporary MATIS object for coarse matrix */ 7151 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7152 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7153 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7154 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7155 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7156 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7157 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7158 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7159 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7160 7161 /* count "active" (i.e. with positive local size) and "void" processes */ 7162 im_active = !!(pcis->n); 7163 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7164 7165 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7166 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7167 /* full_restr : just use the receivers from the subassembling pattern */ 7168 coarse_mat_is = NULL; 7169 multilevel_allowed = PETSC_FALSE; 7170 multilevel_requested = PETSC_FALSE; 7171 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7172 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7173 if (multilevel_requested) { 7174 ncoarse = active_procs/pcbddc->coarsening_ratio; 7175 restr = PETSC_FALSE; 7176 full_restr = PETSC_FALSE; 7177 } else { 7178 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7179 restr = PETSC_TRUE; 7180 full_restr = PETSC_TRUE; 7181 } 7182 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7183 ncoarse = PetscMax(1,ncoarse); 7184 if (!pcbddc->coarse_subassembling) { 7185 if (pcbddc->coarsening_ratio > 1) { 7186 ierr = MatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7187 } else { 7188 PetscMPIInt size,rank; 7189 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7190 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7191 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7192 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7193 } 7194 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7195 PetscInt psum; 7196 PetscMPIInt size; 7197 if (pcbddc->coarse_ksp) psum = 1; 7198 else psum = 0; 7199 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7200 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7201 if (ncoarse < size) have_void = PETSC_TRUE; 7202 } 7203 /* determine if we can go multilevel */ 7204 if (multilevel_requested) { 7205 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7206 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7207 } 7208 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7209 7210 /* dump subassembling pattern */ 7211 if (pcbddc->dbg_flag && multilevel_allowed) { 7212 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7213 } 7214 7215 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7216 nedcfield = -1; 7217 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7218 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7219 const PetscInt *idxs; 7220 ISLocalToGlobalMapping tmap; 7221 7222 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7223 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7224 /* allocate space for temporary storage */ 7225 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7226 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7227 /* allocate for IS array */ 7228 nisdofs = pcbddc->n_ISForDofsLocal; 7229 if (pcbddc->nedclocal) { 7230 if (pcbddc->nedfield > -1) { 7231 nedcfield = pcbddc->nedfield; 7232 } else { 7233 nedcfield = 0; 7234 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7235 nisdofs = 1; 7236 } 7237 } 7238 nisneu = !!pcbddc->NeumannBoundariesLocal; 7239 nisvert = 0; /* nisvert is not used */ 7240 nis = nisdofs + nisneu + nisvert; 7241 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7242 /* dofs splitting */ 7243 for (i=0;i<nisdofs;i++) { 7244 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7245 if (nedcfield != i) { 7246 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7247 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7248 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7249 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7250 } else { 7251 ierr = ISView(pcbddc->nedclocal,NULL);CHKERRQ(ierr); 7252 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7253 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7254 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7255 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7256 } 7257 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7258 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7259 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7260 } 7261 /* neumann boundaries */ 7262 if (pcbddc->NeumannBoundariesLocal) { 7263 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7264 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7265 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7266 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7267 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7268 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7269 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7270 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7271 } 7272 /* free memory */ 7273 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7274 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7275 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7276 } else { 7277 nis = 0; 7278 nisdofs = 0; 7279 nisneu = 0; 7280 nisvert = 0; 7281 isarray = NULL; 7282 } 7283 /* destroy no longer needed map */ 7284 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7285 7286 /* subassemble */ 7287 if (multilevel_allowed) { 7288 Vec vp[1]; 7289 PetscInt nvecs = 0; 7290 PetscBool reuse,reuser; 7291 7292 if (coarse_mat) reuse = PETSC_TRUE; 7293 else reuse = PETSC_FALSE; 7294 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7295 vp[0] = NULL; 7296 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7297 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7298 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7299 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7300 nvecs = 1; 7301 7302 if (pcbddc->divudotp) { 7303 Mat B,loc_divudotp; 7304 Vec v,p; 7305 IS dummy; 7306 PetscInt np; 7307 7308 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7309 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7310 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7311 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7312 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7313 ierr = VecSet(p,1.);CHKERRQ(ierr); 7314 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7315 ierr = VecDestroy(&p);CHKERRQ(ierr); 7316 ierr = MatDestroy(&B);CHKERRQ(ierr); 7317 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7318 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7319 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7320 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7321 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7322 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7323 ierr = VecDestroy(&v);CHKERRQ(ierr); 7324 } 7325 } 7326 if (reuser) { 7327 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7328 } else { 7329 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7330 } 7331 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7332 PetscScalar *arraym,*arrayv; 7333 PetscInt nl; 7334 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7335 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7336 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7337 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7338 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7339 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7340 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7341 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7342 } else { 7343 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7344 } 7345 } else { 7346 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7347 } 7348 if (coarse_mat_is || coarse_mat) { 7349 PetscMPIInt size; 7350 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7351 if (!multilevel_allowed) { 7352 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7353 } else { 7354 Mat A; 7355 7356 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7357 if (coarse_mat_is) { 7358 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7359 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7360 coarse_mat = coarse_mat_is; 7361 } 7362 /* be sure we don't have MatSeqDENSE as local mat */ 7363 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7364 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7365 } 7366 } 7367 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7368 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7369 7370 /* create local to global scatters for coarse problem */ 7371 if (compute_vecs) { 7372 PetscInt lrows; 7373 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7374 if (coarse_mat) { 7375 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7376 } else { 7377 lrows = 0; 7378 } 7379 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7380 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7381 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7382 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7383 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7384 } 7385 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7386 7387 /* set defaults for coarse KSP and PC */ 7388 if (multilevel_allowed) { 7389 coarse_ksp_type = KSPRICHARDSON; 7390 coarse_pc_type = PCBDDC; 7391 } else { 7392 coarse_ksp_type = KSPPREONLY; 7393 coarse_pc_type = PCREDUNDANT; 7394 } 7395 7396 /* print some info if requested */ 7397 if (pcbddc->dbg_flag) { 7398 if (!multilevel_allowed) { 7399 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7400 if (multilevel_requested) { 7401 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7402 } else if (pcbddc->max_levels) { 7403 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7404 } 7405 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7406 } 7407 } 7408 7409 /* communicate coarse discrete gradient */ 7410 coarseG = NULL; 7411 if (pcbddc->nedcG && multilevel_allowed) { 7412 MPI_Comm ccomm; 7413 if (coarse_mat) { 7414 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7415 } else { 7416 ccomm = MPI_COMM_NULL; 7417 } 7418 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7419 } 7420 7421 /* create the coarse KSP object only once with defaults */ 7422 if (coarse_mat) { 7423 PetscViewer dbg_viewer = NULL; 7424 if (pcbddc->dbg_flag) { 7425 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7426 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7427 } 7428 if (!pcbddc->coarse_ksp) { 7429 char prefix[256],str_level[16]; 7430 size_t len; 7431 7432 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7433 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7434 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7435 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7436 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7437 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7438 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7439 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7440 /* TODO is this logic correct? should check for coarse_mat type */ 7441 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7442 /* prefix */ 7443 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7444 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7445 if (!pcbddc->current_level) { 7446 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7447 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7448 } else { 7449 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7450 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7451 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7452 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7453 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7454 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7455 } 7456 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7457 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7458 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7459 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7460 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7461 /* allow user customization */ 7462 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7463 } 7464 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7465 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7466 if (nisdofs) { 7467 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7468 for (i=0;i<nisdofs;i++) { 7469 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7470 } 7471 } 7472 if (nisneu) { 7473 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7474 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7475 } 7476 if (nisvert) { 7477 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7478 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7479 } 7480 if (coarseG) { 7481 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7482 } 7483 7484 /* get some info after set from options */ 7485 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7486 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7487 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7488 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7489 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7490 isbddc = PETSC_FALSE; 7491 } 7492 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7493 if (isredundant) { 7494 KSP inner_ksp; 7495 PC inner_pc; 7496 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7497 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7498 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7499 } 7500 7501 /* parameters which miss an API */ 7502 if (isbddc) { 7503 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7504 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7505 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7506 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7507 if (pcbddc_coarse->benign_saddle_point) { 7508 Mat coarsedivudotp_is; 7509 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7510 IS row,col; 7511 const PetscInt *gidxs; 7512 PetscInt n,st,M,N; 7513 7514 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7515 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7516 st = st-n; 7517 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7518 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7519 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7520 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7521 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7522 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7523 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7524 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7525 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7526 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7527 ierr = ISDestroy(&row);CHKERRQ(ierr); 7528 ierr = ISDestroy(&col);CHKERRQ(ierr); 7529 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7530 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7531 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7532 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7533 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7534 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7535 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7536 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7537 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7538 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7539 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7540 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7541 } 7542 } 7543 7544 /* propagate symmetry info of coarse matrix */ 7545 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7546 if (pc->pmat->symmetric_set) { 7547 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7548 } 7549 if (pc->pmat->hermitian_set) { 7550 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7551 } 7552 if (pc->pmat->spd_set) { 7553 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7554 } 7555 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7556 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7557 } 7558 /* set operators */ 7559 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7560 if (pcbddc->dbg_flag) { 7561 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7562 } 7563 } 7564 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7565 ierr = PetscFree(isarray);CHKERRQ(ierr); 7566 #if 0 7567 { 7568 PetscViewer viewer; 7569 char filename[256]; 7570 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7571 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7572 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7573 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7574 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7575 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7576 } 7577 #endif 7578 7579 if (pcbddc->coarse_ksp) { 7580 Vec crhs,csol; 7581 7582 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7583 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7584 if (!csol) { 7585 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7586 } 7587 if (!crhs) { 7588 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7589 } 7590 } 7591 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7592 7593 /* compute null space for coarse solver if the benign trick has been requested */ 7594 if (pcbddc->benign_null) { 7595 7596 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7597 for (i=0;i<pcbddc->benign_n;i++) { 7598 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7599 } 7600 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7601 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7602 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7603 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7604 if (coarse_mat) { 7605 Vec nullv; 7606 PetscScalar *array,*array2; 7607 PetscInt nl; 7608 7609 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7610 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7611 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7612 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7613 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7614 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7615 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7616 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7617 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7618 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7619 } 7620 } 7621 7622 if (pcbddc->coarse_ksp) { 7623 PetscBool ispreonly; 7624 7625 if (CoarseNullSpace) { 7626 PetscBool isnull; 7627 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7628 if (isnull) { 7629 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7630 } 7631 /* TODO: add local nullspaces (if any) */ 7632 } 7633 /* setup coarse ksp */ 7634 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7635 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7636 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7637 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7638 KSP check_ksp; 7639 KSPType check_ksp_type; 7640 PC check_pc; 7641 Vec check_vec,coarse_vec; 7642 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7643 PetscInt its; 7644 PetscBool compute_eigs; 7645 PetscReal *eigs_r,*eigs_c; 7646 PetscInt neigs; 7647 const char *prefix; 7648 7649 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7650 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7651 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7652 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7653 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7654 /* prevent from setup unneeded object */ 7655 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7656 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7657 if (ispreonly) { 7658 check_ksp_type = KSPPREONLY; 7659 compute_eigs = PETSC_FALSE; 7660 } else { 7661 check_ksp_type = KSPGMRES; 7662 compute_eigs = PETSC_TRUE; 7663 } 7664 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7665 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7666 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7667 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7668 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7669 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7670 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7671 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7672 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7673 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7674 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7675 /* create random vec */ 7676 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7677 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7678 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7679 /* solve coarse problem */ 7680 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7681 /* set eigenvalue estimation if preonly has not been requested */ 7682 if (compute_eigs) { 7683 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7684 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7685 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7686 if (neigs) { 7687 lambda_max = eigs_r[neigs-1]; 7688 lambda_min = eigs_r[0]; 7689 if (pcbddc->use_coarse_estimates) { 7690 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7691 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7692 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7693 } 7694 } 7695 } 7696 } 7697 7698 /* check coarse problem residual error */ 7699 if (pcbddc->dbg_flag) { 7700 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7701 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7702 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7703 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7704 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7705 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7706 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7707 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7708 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7709 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7710 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7711 if (CoarseNullSpace) { 7712 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7713 } 7714 if (compute_eigs) { 7715 PetscReal lambda_max_s,lambda_min_s; 7716 KSPConvergedReason reason; 7717 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7718 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7719 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7720 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7721 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7722 for (i=0;i<neigs;i++) { 7723 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7724 } 7725 } 7726 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7727 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7728 } 7729 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7730 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7731 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7732 if (compute_eigs) { 7733 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7734 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7735 } 7736 } 7737 } 7738 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7739 /* print additional info */ 7740 if (pcbddc->dbg_flag) { 7741 /* waits until all processes reaches this point */ 7742 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7743 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7744 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7745 } 7746 7747 /* free memory */ 7748 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7749 PetscFunctionReturn(0); 7750 } 7751 7752 #undef __FUNCT__ 7753 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7754 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7755 { 7756 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7757 PC_IS* pcis = (PC_IS*)pc->data; 7758 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7759 IS subset,subset_mult,subset_n; 7760 PetscInt local_size,coarse_size=0; 7761 PetscInt *local_primal_indices=NULL; 7762 const PetscInt *t_local_primal_indices; 7763 PetscErrorCode ierr; 7764 7765 PetscFunctionBegin; 7766 /* Compute global number of coarse dofs */ 7767 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7768 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7769 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7770 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7771 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7772 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7773 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7774 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7775 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7776 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7777 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7778 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7779 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7780 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7781 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7782 7783 /* check numbering */ 7784 if (pcbddc->dbg_flag) { 7785 PetscScalar coarsesum,*array,*array2; 7786 PetscInt i; 7787 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7788 7789 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7790 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7791 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7792 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7793 /* counter */ 7794 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7795 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7796 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7797 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7798 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7799 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7800 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7801 for (i=0;i<pcbddc->local_primal_size;i++) { 7802 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7803 } 7804 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7805 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7806 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7807 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7808 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7809 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7810 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7811 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7812 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7813 for (i=0;i<pcis->n;i++) { 7814 if (array[i] != 0.0 && array[i] != array2[i]) { 7815 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7816 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7817 set_error = PETSC_TRUE; 7818 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7820 } 7821 } 7822 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7823 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7824 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7825 for (i=0;i<pcis->n;i++) { 7826 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7827 } 7828 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7829 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7830 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7831 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7832 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7833 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7834 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7835 PetscInt *gidxs; 7836 7837 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7838 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7839 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7840 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7841 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7842 for (i=0;i<pcbddc->local_primal_size;i++) { 7843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7844 } 7845 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7846 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7847 } 7848 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7849 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7850 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7851 } 7852 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7853 /* get back data */ 7854 *coarse_size_n = coarse_size; 7855 *local_primal_indices_n = local_primal_indices; 7856 PetscFunctionReturn(0); 7857 } 7858 7859 #undef __FUNCT__ 7860 #define __FUNCT__ "PCBDDCGlobalToLocal" 7861 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7862 { 7863 IS localis_t; 7864 PetscInt i,lsize,*idxs,n; 7865 PetscScalar *vals; 7866 PetscErrorCode ierr; 7867 7868 PetscFunctionBegin; 7869 /* get indices in local ordering exploiting local to global map */ 7870 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7871 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7872 for (i=0;i<lsize;i++) vals[i] = 1.0; 7873 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7874 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7875 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7876 if (idxs) { /* multilevel guard */ 7877 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7878 } 7879 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7880 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7881 ierr = PetscFree(vals);CHKERRQ(ierr); 7882 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7883 /* now compute set in local ordering */ 7884 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7885 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7886 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7887 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7888 for (i=0,lsize=0;i<n;i++) { 7889 if (PetscRealPart(vals[i]) > 0.5) { 7890 lsize++; 7891 } 7892 } 7893 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7894 for (i=0,lsize=0;i<n;i++) { 7895 if (PetscRealPart(vals[i]) > 0.5) { 7896 idxs[lsize++] = i; 7897 } 7898 } 7899 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7900 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7901 *localis = localis_t; 7902 PetscFunctionReturn(0); 7903 } 7904 7905 #undef __FUNCT__ 7906 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7907 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7908 { 7909 PC_IS *pcis=(PC_IS*)pc->data; 7910 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7911 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7912 Mat S_j; 7913 PetscInt *used_xadj,*used_adjncy; 7914 PetscBool free_used_adj; 7915 PetscErrorCode ierr; 7916 7917 PetscFunctionBegin; 7918 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7919 free_used_adj = PETSC_FALSE; 7920 if (pcbddc->sub_schurs_layers == -1) { 7921 used_xadj = NULL; 7922 used_adjncy = NULL; 7923 } else { 7924 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7925 used_xadj = pcbddc->mat_graph->xadj; 7926 used_adjncy = pcbddc->mat_graph->adjncy; 7927 } else if (pcbddc->computed_rowadj) { 7928 used_xadj = pcbddc->mat_graph->xadj; 7929 used_adjncy = pcbddc->mat_graph->adjncy; 7930 } else { 7931 PetscBool flg_row=PETSC_FALSE; 7932 const PetscInt *xadj,*adjncy; 7933 PetscInt nvtxs; 7934 7935 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7936 if (flg_row) { 7937 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7938 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7939 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7940 free_used_adj = PETSC_TRUE; 7941 } else { 7942 pcbddc->sub_schurs_layers = -1; 7943 used_xadj = NULL; 7944 used_adjncy = NULL; 7945 } 7946 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7947 } 7948 } 7949 7950 /* setup sub_schurs data */ 7951 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7952 if (!sub_schurs->schur_explicit) { 7953 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7954 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7955 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7956 } else { 7957 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7958 PetscBool isseqaij,need_change = PETSC_FALSE; 7959 PetscInt benign_n; 7960 Mat change = NULL; 7961 Vec scaling = NULL; 7962 IS change_primal = NULL; 7963 7964 if (!pcbddc->use_vertices && reuse_solvers) { 7965 PetscInt n_vertices; 7966 7967 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7968 reuse_solvers = (PetscBool)!n_vertices; 7969 } 7970 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7971 if (!isseqaij) { 7972 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7973 if (matis->A == pcbddc->local_mat) { 7974 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7975 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7976 } else { 7977 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7978 } 7979 } 7980 if (!pcbddc->benign_change_explicit) { 7981 benign_n = pcbddc->benign_n; 7982 } else { 7983 benign_n = 0; 7984 } 7985 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7986 We need a global reduction to avoid possible deadlocks. 7987 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7988 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7989 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7990 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7991 need_change = (PetscBool)(!need_change); 7992 } 7993 /* If the user defines additional constraints, we import them here. 7994 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7995 if (need_change) { 7996 PC_IS *pcisf; 7997 PC_BDDC *pcbddcf; 7998 PC pcf; 7999 8000 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8001 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8002 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8003 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8004 /* hacks */ 8005 pcisf = (PC_IS*)pcf->data; 8006 pcisf->is_B_local = pcis->is_B_local; 8007 pcisf->vec1_N = pcis->vec1_N; 8008 pcisf->BtoNmap = pcis->BtoNmap; 8009 pcisf->n = pcis->n; 8010 pcisf->n_B = pcis->n_B; 8011 pcbddcf = (PC_BDDC*)pcf->data; 8012 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8013 pcbddcf->mat_graph = pcbddc->mat_graph; 8014 pcbddcf->use_faces = PETSC_TRUE; 8015 pcbddcf->use_change_of_basis = PETSC_TRUE; 8016 pcbddcf->use_change_on_faces = PETSC_TRUE; 8017 pcbddcf->use_qr_single = PETSC_TRUE; 8018 pcbddcf->fake_change = PETSC_TRUE; 8019 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8020 /* store information on primal vertices and change of basis (in local numbering) */ 8021 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8022 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8023 change = pcbddcf->ConstraintMatrix; 8024 pcbddcf->ConstraintMatrix = NULL; 8025 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8026 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8027 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8028 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8029 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8030 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8031 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8032 pcf->ops->destroy = NULL; 8033 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8034 } 8035 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8036 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8037 ierr = MatDestroy(&change);CHKERRQ(ierr); 8038 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8039 } 8040 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8041 8042 /* free adjacency */ 8043 if (free_used_adj) { 8044 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8045 } 8046 PetscFunctionReturn(0); 8047 } 8048 8049 #undef __FUNCT__ 8050 #define __FUNCT__ "PCBDDCInitSubSchurs" 8051 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8052 { 8053 PC_IS *pcis=(PC_IS*)pc->data; 8054 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8055 PCBDDCGraph graph; 8056 PetscErrorCode ierr; 8057 8058 PetscFunctionBegin; 8059 /* attach interface graph for determining subsets */ 8060 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8061 IS verticesIS,verticescomm; 8062 PetscInt vsize,*idxs; 8063 8064 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8065 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8066 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8067 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8068 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8069 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8070 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8071 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8072 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8073 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8074 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8075 } else { 8076 graph = pcbddc->mat_graph; 8077 } 8078 /* print some info */ 8079 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8080 IS vertices; 8081 PetscInt nv,nedges,nfaces; 8082 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8083 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8084 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8085 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8086 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8087 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8089 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8090 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8091 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8092 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8093 } 8094 8095 /* sub_schurs init */ 8096 if (!pcbddc->sub_schurs) { 8097 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8098 } 8099 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8100 8101 /* free graph struct */ 8102 if (pcbddc->sub_schurs_rebuild) { 8103 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8104 } 8105 PetscFunctionReturn(0); 8106 } 8107 8108 #undef __FUNCT__ 8109 #define __FUNCT__ "PCBDDCCheckOperator" 8110 PetscErrorCode PCBDDCCheckOperator(PC pc) 8111 { 8112 PC_IS *pcis=(PC_IS*)pc->data; 8113 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8114 PetscErrorCode ierr; 8115 8116 PetscFunctionBegin; 8117 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8118 IS zerodiag = NULL; 8119 Mat S_j,B0_B=NULL; 8120 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8121 PetscScalar *p0_check,*array,*array2; 8122 PetscReal norm; 8123 PetscInt i; 8124 8125 /* B0 and B0_B */ 8126 if (zerodiag) { 8127 IS dummy; 8128 8129 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8130 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8131 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8132 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8133 } 8134 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8135 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8136 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8137 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8138 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8139 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8140 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8141 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8142 /* S_j */ 8143 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8144 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8145 8146 /* mimic vector in \widetilde{W}_\Gamma */ 8147 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8148 /* continuous in primal space */ 8149 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8150 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8151 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8152 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8153 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8154 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8155 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8156 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8157 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8158 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8159 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8160 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8161 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8162 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8163 8164 /* assemble rhs for coarse problem */ 8165 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8166 /* local with Schur */ 8167 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8168 if (zerodiag) { 8169 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8170 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8171 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8172 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8173 } 8174 /* sum on primal nodes the local contributions */ 8175 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8176 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8177 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8178 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8179 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8180 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8181 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8182 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8183 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8184 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8185 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8186 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8187 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8188 /* scale primal nodes (BDDC sums contibutions) */ 8189 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8190 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8191 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8192 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8193 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8194 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8195 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8196 /* global: \widetilde{B0}_B w_\Gamma */ 8197 if (zerodiag) { 8198 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8199 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8200 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8201 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8202 } 8203 /* BDDC */ 8204 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8205 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8206 8207 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8208 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8209 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8210 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8211 for (i=0;i<pcbddc->benign_n;i++) { 8212 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8213 } 8214 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8215 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8216 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8217 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8218 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8219 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8220 } 8221 PetscFunctionReturn(0); 8222 } 8223 8224 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8225 #undef __FUNCT__ 8226 #define __FUNCT__ "MatMPIAIJRestrict" 8227 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8228 { 8229 Mat At; 8230 IS rows; 8231 MPI_Comm comm; 8232 PetscInt rst,ren; 8233 PetscErrorCode ierr; 8234 PetscLayout rmap; 8235 8236 PetscFunctionBegin; 8237 rst = ren = 0; 8238 if (ccomm != MPI_COMM_NULL) { 8239 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8240 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8241 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8242 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8243 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8244 } 8245 ierr = ISCreateStride(comm,ren-rst,rst,1,&rows);CHKERRQ(ierr); 8246 ierr = MatGetSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8247 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8248 8249 if (ccomm != MPI_COMM_NULL) { 8250 Mat_MPIAIJ *a,*b; 8251 IS from,to; 8252 Vec gvec; 8253 PetscInt lsize; 8254 8255 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8256 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8257 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8258 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8259 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8260 a = (Mat_MPIAIJ*)At->data; 8261 b = (Mat_MPIAIJ*)(*B)->data; 8262 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8263 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8264 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8265 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8266 b->A = a->A; 8267 b->B = a->B; 8268 8269 b->donotstash = a->donotstash; 8270 b->roworiented = a->roworiented; 8271 b->rowindices = 0; 8272 b->rowvalues = 0; 8273 b->getrowactive = PETSC_FALSE; 8274 8275 (*B)->rmap = rmap; 8276 (*B)->factortype = A->factortype; 8277 (*B)->assembled = PETSC_TRUE; 8278 (*B)->insertmode = NOT_SET_VALUES; 8279 (*B)->preallocated = PETSC_TRUE; 8280 8281 if (a->colmap) { 8282 #if defined(PETSC_USE_CTABLE) 8283 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8284 #else 8285 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8286 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8287 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8288 #endif 8289 } else b->colmap = 0; 8290 if (a->garray) { 8291 PetscInt len; 8292 len = a->B->cmap->n; 8293 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8294 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8295 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8296 } else b->garray = 0; 8297 8298 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8299 b->lvec = a->lvec; 8300 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8301 8302 /* cannot use VecScatterCopy */ 8303 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8304 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8305 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8306 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8307 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8308 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8309 ierr = ISDestroy(&from);CHKERRQ(ierr); 8310 ierr = ISDestroy(&to);CHKERRQ(ierr); 8311 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8312 ierr = PetscObjectSetName((PetscObject)*B,"coarse_restrict_G");CHKERRQ(ierr); 8313 ierr = MatView(*B,NULL);CHKERRQ(ierr); 8314 } 8315 ierr = MatDestroy(&At);CHKERRQ(ierr); 8316 PetscFunctionReturn(0); 8317 } 8318