1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* returns B s.t. range(B) _|_ range(A) */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatDense_OrthogonalComplement" 12 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 13 { 14 #if !defined(PETSC_USE_COMPLEX) 15 PetscScalar *uwork,*data,*U, ds = 0.; 16 PetscReal *sing; 17 PetscBLASInt bM,bN,lwork,lierr,di = 1; 18 PetscInt ulw,i,nr,nc,n; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 #if defined(PETSC_MISSING_LAPACK_GESVD) 23 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 24 #endif 25 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 26 if (!nr || !nc) PetscFunctionReturn(0); 27 28 /* workspace */ 29 if (!work) { 30 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 31 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 32 } else { 33 ulw = lw; 34 uwork = work; 35 } 36 n = PetscMin(nr,nc); 37 if (!rwork) { 38 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 39 } else { 40 sing = rwork; 41 } 42 43 /* SVD */ 44 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 48 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 49 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 50 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 51 ierr = PetscFPTrapPop();CHKERRQ(ierr); 52 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 53 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 54 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 55 if (!rwork) { 56 ierr = PetscFree(sing);CHKERRQ(ierr); 57 } 58 if (!work) { 59 ierr = PetscFree(uwork);CHKERRQ(ierr); 60 } 61 /* create B */ 62 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 63 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 65 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscFree(U);CHKERRQ(ierr); 67 #else 68 PetscFunctionBegin; 69 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 70 #endif 71 PetscFunctionReturn(0); 72 } 73 74 /* TODO REMOVE */ 75 #if defined(PRINT_GDET) 76 static int inc = 0; 77 static int lev = 0; 78 #endif 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 82 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 83 { 84 PetscErrorCode ierr; 85 Mat GE,GEd; 86 PetscInt rsize,csize,esize; 87 PetscScalar *ptr; 88 89 PetscFunctionBegin; 90 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 91 if (!esize) PetscFunctionReturn(0); 92 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 93 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 94 95 /* gradients */ 96 ptr = work + 5*esize; 97 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 99 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 100 ierr = MatDestroy(&GE);CHKERRQ(ierr); 101 102 /* constants */ 103 ptr += rsize*csize; 104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 105 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 107 ierr = MatDestroy(&GE);CHKERRQ(ierr); 108 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 109 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 110 111 if (corners) { 112 Mat GEc; 113 PetscScalar *vals,v; 114 115 ierr = MatGetSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 116 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 117 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 118 /* v = PetscAbsScalar(vals[0]) */; 119 v = 1.; 120 cvals[0] = vals[0]/v; 121 cvals[1] = vals[1]/v; 122 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 123 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 124 #if defined(PRINT_GDET) 125 { 126 PetscViewer viewer; 127 char filename[256]; 128 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 129 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 130 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 131 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 132 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 133 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 134 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 135 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 136 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 137 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 138 } 139 #endif 140 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 141 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 142 } 143 144 PetscFunctionReturn(0); 145 } 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "PCBDDCNedelecSupport" 149 PetscErrorCode PCBDDCNedelecSupport(PC pc) 150 { 151 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 152 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 153 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 154 Vec tvec; 155 PetscSF sfv; 156 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 157 MPI_Comm comm; 158 IS lned,primals,allprimals,nedfieldlocal; 159 IS *eedges,*extrows,*extcols,*alleedges; 160 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee,bter; 161 PetscScalar *vals,*work; 162 PetscReal *rwork; 163 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 164 PetscInt ne,nv,Lv,order,n,field; 165 PetscInt n_neigh,*neigh,*n_shared,**shared; 166 PetscInt i,j,extmem,cum,maxsize,nee; 167 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 168 PetscInt *sfvleaves,*sfvroots; 169 PetscInt *corners,*cedges; 170 PetscInt *ecount,**eneighs,*vcount,**vneighs; 171 #if defined(PETSC_USE_DEBUG) 172 PetscInt *emarks; 173 #endif 174 PetscBool print,eerr,done,lrc[2],conforming,global; 175 PetscErrorCode ierr; 176 177 PetscFunctionBegin; 178 /* test variable order code and print debug info TODO: to be removed */ 179 print = PETSC_FALSE; 180 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 181 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 182 183 /* Return to caller if there are no edges in the decomposition */ 184 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 185 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 186 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 187 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 188 lrc[0] = PETSC_FALSE; 189 for (i=0;i<n;i++) { 190 if (PetscRealPart(vals[i]) > 2.) { 191 lrc[0] = PETSC_TRUE; 192 break; 193 } 194 } 195 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 196 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 197 if (!lrc[1]) PetscFunctionReturn(0); 198 199 /* If the discrete gradient is defined for a subset of dofs and global is true, 200 it assumes G is given in global ordering for all the dofs. 201 Otherwise, the ordering is global for the Nedelec field */ 202 order = pcbddc->nedorder; 203 conforming = pcbddc->conforming; 204 field = pcbddc->nedfield; 205 global = pcbddc->nedglobal; 206 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 207 if (pcbddc->n_ISForDofsLocal && field > -1) { 208 PetscBool setprimal = PETSC_FALSE; 209 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field_primal",&setprimal,NULL);CHKERRQ(ierr); 210 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 211 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 212 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 213 if (setprimal) { 214 IS enedfieldlocal; 215 PetscInt *eidxs; 216 217 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 218 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 219 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 220 for (i=0,cum=0;i<ne;i++) { 221 if (PetscRealPart(vals[idxs[i]]) > 2.) { 222 eidxs[cum++] = idxs[i]; 223 } 224 } 225 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 226 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 227 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 228 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 229 ierr = PetscFree(eidxs);CHKERRQ(ierr); 230 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 231 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 } else if (!pcbddc->n_ISForDofsLocal) { 235 PetscBool testnedfield = PETSC_FALSE; 236 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 237 if (!testnedfield) { 238 ne = n; 239 nedfieldlocal = NULL; 240 } else { 241 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 242 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 243 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 245 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 246 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 247 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 248 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 249 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 250 for (i=0,cum=0;i<n;i++) { 251 if (matis->sf_leafdata[i] > 1) { 252 matis->sf_leafdata[cum++] = i; 253 } 254 } 255 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 256 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 257 } 258 global = PETSC_TRUE; 259 } else { 260 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 261 } 262 263 if (nedfieldlocal) { /* merge with previous code when testing is done */ 264 IS is; 265 266 /* need to map from the local Nedelec field to local numbering */ 267 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 268 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 269 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 270 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 271 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 272 if (global) { 273 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 274 el2g = al2g; 275 } else { 276 IS gis; 277 278 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 279 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 280 ierr = ISDestroy(&gis);CHKERRQ(ierr); 281 } 282 ierr = ISDestroy(&is);CHKERRQ(ierr); 283 } else { 284 /* restore default */ 285 pcbddc->nedfield = -1; 286 /* one ref for the destruction of al2g, one for el2g */ 287 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 288 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 289 el2g = al2g; 290 fl2g = NULL; 291 } 292 293 /* Sanity checks */ 294 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 295 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 296 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 297 298 /* Drop connections for interior edges */ 299 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 300 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 301 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 302 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 303 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 304 if (nedfieldlocal) { 305 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 306 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 307 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 308 } else { 309 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 310 } 311 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 312 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 313 if (global) { 314 PetscInt rst; 315 316 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 317 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 318 if (matis->sf_rootdata[i] < 2) { 319 matis->sf_rootdata[cum++] = i + rst; 320 } 321 } 322 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 323 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 324 } else { 325 PetscInt *tbz; 326 327 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 328 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 329 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 330 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 331 for (i=0,cum=0;i<ne;i++) 332 if (matis->sf_leafdata[idxs[i]] == 1) 333 tbz[cum++] = i; 334 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 335 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 336 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 337 ierr = PetscFree(tbz);CHKERRQ(ierr); 338 } 339 340 /* Extract subdomain relevant rows of G */ 341 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 342 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 343 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 344 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 345 ierr = ISDestroy(&lned);CHKERRQ(ierr); 346 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 347 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 348 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 349 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 350 if (print) { 351 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 352 ierr = MatView(lG,NULL);CHKERRQ(ierr); 353 } 354 355 /* SF for nodal communications */ 356 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 357 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 358 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 359 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 360 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 361 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 362 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 363 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 364 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 365 366 /* Destroy temporary G created in MATIS format and modified G */ 367 ierr = MatDestroy(&G);CHKERRQ(ierr); 368 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 369 370 /* Save lG */ 371 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 372 373 /* Analyze the edge-nodes connections (duplicate lG) */ 374 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 375 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 376 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 377 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 378 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 379 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 380 /* need to import the boundary specification to ensure the 381 proper detection of coarse edges' endpoints */ 382 if (pcbddc->DirichletBoundariesLocal) { 383 IS is; 384 385 if (fl2g) { 386 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 387 } else { 388 is = pcbddc->DirichletBoundariesLocal; 389 } 390 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 391 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 392 for (i=0;i<cum;i++) { 393 if (idxs[i] >= 0) { 394 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 395 } 396 } 397 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 398 if (fl2g) { 399 ierr = ISDestroy(&is);CHKERRQ(ierr); 400 } 401 } 402 if (pcbddc->NeumannBoundariesLocal) { 403 IS is; 404 405 if (fl2g) { 406 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 407 } else { 408 is = pcbddc->NeumannBoundariesLocal; 409 } 410 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 411 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 412 for (i=0;i<cum;i++) { 413 if (idxs[i] >= 0) { 414 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 415 } 416 } 417 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 418 if (fl2g) { 419 ierr = ISDestroy(&is);CHKERRQ(ierr); 420 } 421 } 422 423 /* count neighs per dof */ 424 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 425 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 426 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 427 for (i=1,cum=0;i<n_neigh;i++) { 428 cum += n_shared[i]; 429 for (j=0;j<n_shared[i];j++) { 430 ecount[shared[i][j]]++; 431 } 432 } 433 if (ne) { 434 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 435 } 436 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 437 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 438 for (i=1;i<n_neigh;i++) { 439 for (j=0;j<n_shared[i];j++) { 440 PetscInt k = shared[i][j]; 441 eneighs[k][ecount[k]] = neigh[i]; 442 ecount[k]++; 443 } 444 } 445 for (i=0;i<ne;i++) { 446 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 447 } 448 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 449 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 450 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 451 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 452 for (i=1,cum=0;i<n_neigh;i++) { 453 cum += n_shared[i]; 454 for (j=0;j<n_shared[i];j++) { 455 vcount[shared[i][j]]++; 456 } 457 } 458 if (nv) { 459 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 460 } 461 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 462 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 463 for (i=1;i<n_neigh;i++) { 464 for (j=0;j<n_shared[i];j++) { 465 PetscInt k = shared[i][j]; 466 vneighs[k][vcount[k]] = neigh[i]; 467 vcount[k]++; 468 } 469 } 470 for (i=0;i<nv;i++) { 471 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 472 } 473 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 474 475 /* need to remove coarse faces' dofs to ensure the 476 proper detection of coarse edges' endpoints */ 477 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 478 for (i=0;i<ne;i++) { 479 if (ecount[i] > 1 || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 480 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 481 } 482 } 483 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 484 if (!conforming) { 485 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 486 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 487 } 488 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 489 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 490 cum = 0; 491 for (i=0;i<ne;i++) { 492 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 493 if (!PetscBTLookup(btee,i)) { 494 marks[cum++] = i; 495 continue; 496 } 497 /* set badly connected edge dofs as primal */ 498 if (!conforming) { 499 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 500 marks[cum++] = i; 501 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 502 for (j=ii[i];j<ii[i+1];j++) { 503 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 504 } 505 } else { 506 /* every edge dofs should be connected trough a certain number of nodal dofs 507 to other edge dofs belonging to coarse edges 508 - at most 2 endpoints 509 - order-1 interior nodal dofs 510 - no undefined nodal dofs (nconn < order) 511 */ 512 PetscInt ends = 0,ints = 0, undef = 0; 513 for (j=ii[i];j<ii[i+1];j++) { 514 PetscInt v = jj[j],k; 515 PetscInt nconn = iit[v+1]-iit[v]; 516 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 517 if (nconn > order) ends++; 518 else if (nconn == order) ints++; 519 else undef++; 520 } 521 if (undef || ends > 2 || ints != order -1) { 522 marks[cum++] = i; 523 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 524 for (j=ii[i];j<ii[i+1];j++) { 525 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 526 } 527 } 528 } 529 } 530 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 531 if (!order && ii[i+1] != ii[i]) { 532 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 533 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 534 } 535 } 536 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 537 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 538 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 539 if (!conforming) { 540 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 541 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 542 } 543 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 544 545 /* identify splitpoints and corner candidates */ 546 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 547 if (print) { 548 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 549 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 550 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 551 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 552 } 553 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 554 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 555 for (i=0;i<nv;i++) { 556 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 557 PetscBool sneighs = PETSC_TRUE; 558 if (!order) { /* variable order */ 559 PetscReal vorder = 0.; 560 561 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 562 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 563 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 564 ord = 1; 565 } 566 #if defined(PETSC_USE_DEBUG) 567 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 568 #endif 569 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 570 if (vc != ecount[jj[j]]) { 571 sneighs = PETSC_FALSE; 572 } else { 573 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 574 for (k=0;k<vc;k++) { 575 if (vn[k] != en[k]) { 576 sneighs = PETSC_FALSE; 577 break; 578 } 579 } 580 } 581 } 582 if (!sneighs || test >= 3*ord) { /* splitpoints */ 583 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d)\n",i,!sneighs,test >= 3*ord); 584 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 585 } else if (test == ord) { 586 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 587 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 588 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 589 } else { 590 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 591 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 592 } 593 } 594 } 595 ierr = PetscFree(ecount);CHKERRQ(ierr); 596 ierr = PetscFree(vcount);CHKERRQ(ierr); 597 if (ne) { 598 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 599 } 600 if (nv) { 601 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 602 } 603 ierr = PetscFree(eneighs);CHKERRQ(ierr); 604 ierr = PetscFree(vneighs);CHKERRQ(ierr); 605 606 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 607 if (order != 1) { 608 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 609 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 610 for (i=0;i<nv;i++) { 611 if (PetscBTLookup(btvcand,i)) { 612 PetscBool found = PETSC_FALSE; 613 for (j=ii[i];j<ii[i+1] && !found;j++) { 614 PetscInt k,e = jj[j]; 615 if (PetscBTLookup(bte,e)) continue; 616 for (k=iit[e];k<iit[e+1];k++) { 617 PetscInt v = jjt[k]; 618 if (v != i && PetscBTLookup(btvcand,v)) { 619 found = PETSC_TRUE; 620 break; 621 } 622 } 623 } 624 if (!found) { 625 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 626 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 627 } else { 628 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 629 } 630 } 631 } 632 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 633 } 634 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 635 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 636 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 637 638 /* Get the local G^T explicitly */ 639 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 640 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 641 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 642 643 /* Mark interior nodal dofs */ 644 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 645 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 646 for (i=1;i<n_neigh;i++) { 647 for (j=0;j<n_shared[i];j++) { 648 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 649 } 650 } 651 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 652 653 /* communicate corners and splitpoints */ 654 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 655 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 656 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 657 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 658 659 if (print) { 660 IS tbz; 661 662 cum = 0; 663 for (i=0;i<nv;i++) 664 if (sfvleaves[i]) 665 vmarks[cum++] = i; 666 667 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 668 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 669 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 670 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 671 } 672 673 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 674 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 675 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 676 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 677 678 /* Zero rows of lGt corresponding to identified corners 679 and interior nodal dofs */ 680 cum = 0; 681 for (i=0;i<nv;i++) { 682 if (sfvleaves[i]) { 683 vmarks[cum++] = i; 684 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 685 } 686 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 687 } 688 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 689 if (print) { 690 IS tbz; 691 692 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 693 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 694 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 695 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 696 } 697 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 698 ierr = PetscFree(vmarks);CHKERRQ(ierr); 699 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 700 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 701 702 /* Recompute G */ 703 ierr = MatDestroy(&lG);CHKERRQ(ierr); 704 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 705 if (print) { 706 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 707 ierr = MatView(lG,NULL);CHKERRQ(ierr); 708 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 709 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 710 } 711 712 /* Get primal dofs (if any) */ 713 cum = 0; 714 for (i=0;i<ne;i++) { 715 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 716 } 717 if (fl2g) { 718 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 719 } 720 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 721 if (print) { 722 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 723 ierr = ISView(primals,NULL);CHKERRQ(ierr); 724 } 725 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 726 /* TODO: what if the user passed in some of them ? */ 727 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 728 ierr = ISDestroy(&primals);CHKERRQ(ierr); 729 730 /* Compute edge connectivity */ 731 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 732 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 733 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 734 if (fl2g) { 735 PetscBT btf; 736 PetscInt *iia,*jja,*iiu,*jju; 737 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 738 739 /* create CSR for all local dofs */ 740 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 741 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 742 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 743 iiu = pcbddc->mat_graph->xadj; 744 jju = pcbddc->mat_graph->adjncy; 745 } else if (pcbddc->use_local_adj) { 746 rest = PETSC_TRUE; 747 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 748 } else { 749 free = PETSC_TRUE; 750 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 751 iiu[0] = 0; 752 for (i=0;i<n;i++) { 753 iiu[i+1] = i+1; 754 jju[i] = -1; 755 } 756 } 757 758 /* import sizes of CSR */ 759 iia[0] = 0; 760 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 761 762 /* overwrite entries corresponding to the Nedelec field */ 763 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 764 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 765 for (i=0;i<ne;i++) { 766 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 767 iia[idxs[i]+1] = ii[i+1]-ii[i]; 768 } 769 770 /* iia in CSR */ 771 for (i=0;i<n;i++) iia[i+1] += iia[i]; 772 773 /* jja in CSR */ 774 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 775 for (i=0;i<n;i++) 776 if (!PetscBTLookup(btf,i)) 777 for (j=0;j<iiu[i+1]-iiu[i];j++) 778 jja[iia[i]+j] = jju[iiu[i]+j]; 779 780 /* map edge dofs connectivity */ 781 if (jj) { 782 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 783 for (i=0;i<ne;i++) { 784 PetscInt e = idxs[i]; 785 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 786 } 787 } 788 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 789 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 790 if (rest) { 791 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 792 } 793 if (free) { 794 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 795 } 796 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 797 } else { 798 if (jj) { 799 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 800 } 801 } 802 803 /* Analyze interface for edge dofs */ 804 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 805 806 /* Get coarse edges in the edge space */ 807 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 808 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 809 810 if (fl2g) { 811 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 812 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 813 for (i=0;i<nee;i++) { 814 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 815 } 816 } else { 817 eedges = alleedges; 818 primals = allprimals; 819 } 820 821 /* Mark fine edge dofs with their coarse edge id */ 822 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 823 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 824 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 825 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 826 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 827 if (print) { 828 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 829 ierr = ISView(primals,NULL);CHKERRQ(ierr); 830 } 831 832 maxsize = 0; 833 for (i=0;i<nee;i++) { 834 PetscInt size,mark = i+1; 835 836 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 837 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 838 for (j=0;j<size;j++) marks[idxs[j]] = mark; 839 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 840 maxsize = PetscMax(maxsize,size); 841 } 842 843 /* Find coarse edge endpoints */ 844 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 845 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 846 for (i=0;i<nee;i++) { 847 PetscInt mark = i+1,size; 848 849 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 850 if (!size && nedfieldlocal) continue; 851 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 852 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 853 if (print) { 854 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 855 ISView(eedges[i],NULL); 856 } 857 for (j=0;j<size;j++) { 858 PetscInt k, ee = idxs[j]; 859 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 860 for (k=ii[ee];k<ii[ee+1];k++) { 861 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 862 if (PetscBTLookup(btv,jj[k])) { 863 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 864 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 865 PetscInt k2; 866 PetscBool corner = PETSC_FALSE; 867 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 868 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 869 /* it's a corner if either is connected with an edge dof belonging to a different cc or 870 if the edge dof lie on the natural part of the boundary */ 871 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 872 corner = PETSC_TRUE; 873 break; 874 } 875 } 876 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 877 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 878 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 879 } else { 880 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 881 } 882 } 883 } 884 } 885 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 886 } 887 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 888 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 889 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 890 891 /* Reset marked primal dofs */ 892 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 893 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 894 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 895 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 896 897 /* Now use the initial lG */ 898 ierr = MatDestroy(&lG);CHKERRQ(ierr); 899 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 900 lG = lGinit; 901 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 902 903 /* Compute extended cols indices */ 904 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 905 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 906 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 907 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 908 i *= maxsize; 909 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 910 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 911 eerr = PETSC_FALSE; 912 for (i=0;i<nee;i++) { 913 PetscInt size,found = 0; 914 915 cum = 0; 916 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 917 if (!size && nedfieldlocal) continue; 918 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 919 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 920 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 921 for (j=0;j<size;j++) { 922 PetscInt k,ee = idxs[j]; 923 for (k=ii[ee];k<ii[ee+1];k++) { 924 PetscInt vv = jj[k]; 925 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 926 else if (!PetscBTLookupSet(btvc,vv)) found++; 927 } 928 } 929 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 930 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 931 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 932 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 933 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 934 /* it may happen that endpoints are not defined at this point 935 if it is the case, mark this edge for a second pass */ 936 if (cum != size -1 || found != 2) { 937 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 938 if (print) { 939 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 940 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 941 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 942 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 943 } 944 eerr = PETSC_TRUE; 945 } 946 } 947 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 948 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 949 if (done) { 950 PetscInt *newprimals; 951 952 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 953 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 954 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 955 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 956 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 957 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 958 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 959 for (i=0;i<nee;i++) { 960 PetscBool has_candidates = PETSC_FALSE; 961 if (PetscBTLookup(bter,i)) { 962 PetscInt size,mark = i+1; 963 964 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 965 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 966 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 967 for (j=0;j<size;j++) { 968 PetscInt k,ee = idxs[j]; 969 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 970 for (k=ii[ee];k<ii[ee+1];k++) { 971 /* set all candidates located on the edge as corners */ 972 if (PetscBTLookup(btvcand,jj[k])) { 973 PetscInt k2,vv = jj[k]; 974 has_candidates = PETSC_TRUE; 975 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 976 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 977 /* set all edge dofs connected to candidate as primals */ 978 for (k2=iit[vv];k2<iit[vv+1];k2++) { 979 if (marks[jjt[k2]] == mark) { 980 PetscInt k3,ee2 = jjt[k2]; 981 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 982 newprimals[cum++] = ee2; 983 /* finally set the new corners */ 984 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 985 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 986 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 987 } 988 } 989 } 990 } else { 991 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 992 } 993 } 994 } 995 if (!has_candidates) { /* circular edge */ 996 PetscInt k, ee = idxs[0],*tmarks; 997 998 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 999 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1000 for (k=ii[ee];k<ii[ee+1];k++) { 1001 PetscInt k2; 1002 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1003 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1004 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1005 } 1006 for (j=0;j<size;j++) { 1007 if (tmarks[idxs[j]] > 1) { 1008 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1009 newprimals[cum++] = idxs[j]; 1010 } 1011 } 1012 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1013 } 1014 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1015 } 1016 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1017 } 1018 ierr = PetscFree(extcols);CHKERRQ(ierr); 1019 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1020 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1021 if (fl2g) { 1022 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1023 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1024 for (i=0;i<nee;i++) { 1025 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1026 } 1027 ierr = PetscFree(eedges);CHKERRQ(ierr); 1028 } 1029 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1030 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1031 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1032 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1033 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1034 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1035 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1036 if (fl2g) { 1037 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1038 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1039 for (i=0;i<nee;i++) { 1040 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1041 } 1042 } else { 1043 eedges = alleedges; 1044 primals = allprimals; 1045 } 1046 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1047 1048 /* Mark again */ 1049 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1050 for (i=0;i<nee;i++) { 1051 PetscInt size,mark = i+1; 1052 1053 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1054 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1055 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1056 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1057 } 1058 if (print) { 1059 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1060 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1061 } 1062 1063 /* Recompute extended cols */ 1064 eerr = PETSC_FALSE; 1065 for (i=0;i<nee;i++) { 1066 PetscInt size; 1067 1068 cum = 0; 1069 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1070 if (!size && nedfieldlocal) continue; 1071 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1072 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1073 for (j=0;j<size;j++) { 1074 PetscInt k,ee = idxs[j]; 1075 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1076 } 1077 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1078 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1079 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1080 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1081 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1082 if (cum != size -1) { 1083 if (print) { 1084 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1085 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1086 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1087 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1088 } 1089 eerr = PETSC_TRUE; 1090 } 1091 } 1092 } 1093 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1094 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1095 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1096 /* an error should not occur at this point */ 1097 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1098 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1099 1100 /* Check the number of endpoints */ 1101 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1102 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1103 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1104 for (i=0;i<nee;i++) { 1105 PetscInt size, found = 0, gc[2]; 1106 1107 /* init with defaults */ 1108 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1109 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1110 if (!size && nedfieldlocal) continue; 1111 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1112 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1113 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1114 for (j=0;j<size;j++) { 1115 PetscInt k,ee = idxs[j]; 1116 for (k=ii[ee];k<ii[ee+1];k++) { 1117 PetscInt vv = jj[k]; 1118 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1119 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1120 corners[i*2+found++] = vv; 1121 } 1122 } 1123 } 1124 if (found != 2) { 1125 PetscInt e; 1126 if (fl2g) { 1127 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1128 } else { 1129 e = idxs[0]; 1130 } 1131 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1132 } 1133 1134 /* get primal dof index on this coarse edge */ 1135 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1136 if (gc[0] > gc[1]) { 1137 PetscInt swap = corners[2*i]; 1138 corners[2*i] = corners[2*i+1]; 1139 corners[2*i+1] = swap; 1140 } 1141 cedges[i] = idxs[size-1]; 1142 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1143 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1144 } 1145 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1146 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1147 1148 #if defined(PETSC_USE_DEBUG) 1149 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1150 not interfere with neighbouring coarse edges */ 1151 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1152 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1153 for (i=0;i<nv;i++) { 1154 PetscInt emax = 0,eemax = 0; 1155 1156 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1157 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1158 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1159 for (j=1;j<nee+1;j++) { 1160 if (emax < emarks[j]) { 1161 emax = emarks[j]; 1162 eemax = j; 1163 } 1164 } 1165 /* not relevant for edges */ 1166 if (!eemax) continue; 1167 1168 for (j=ii[i];j<ii[i+1];j++) { 1169 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1170 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1171 } 1172 } 1173 } 1174 ierr = PetscFree(emarks);CHKERRQ(ierr); 1175 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1176 #endif 1177 1178 /* Compute extended rows indices for edge blocks of the change of basis */ 1179 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1180 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1181 extmem *= maxsize; 1182 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1183 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1184 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1185 for (i=0;i<nv;i++) { 1186 PetscInt mark = 0,size,start; 1187 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1188 for (j=ii[i];j<ii[i+1];j++) 1189 if (marks[jj[j]] && !mark) 1190 mark = marks[jj[j]]; 1191 1192 /* not relevant */ 1193 if (!mark) continue; 1194 1195 /* import extended row */ 1196 mark--; 1197 start = mark*extmem+extrowcum[mark]; 1198 size = ii[i+1]-ii[i]; 1199 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1200 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1201 extrowcum[mark] += size; 1202 } 1203 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1204 cum = 0; 1205 for (i=0;i<nee;i++) { 1206 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1207 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1208 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1209 cum = PetscMax(cum,size); 1210 } 1211 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1212 ierr = PetscFree(marks);CHKERRQ(ierr); 1213 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1214 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1215 1216 /* Workspace for lapack inner calls and VecSetValues */ 1217 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1218 1219 /* Create change of basis matrix (preallocation can be improved) */ 1220 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1221 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1222 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1223 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1224 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1225 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1226 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1227 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1228 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1229 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1230 1231 /* Defaults to identity */ 1232 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1233 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1234 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1235 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1236 1237 /* Create discrete gradient for the coarser level if needed */ 1238 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1239 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1240 if (pcbddc->current_level < pcbddc->max_levels) { 1241 ISLocalToGlobalMapping cel2g,cvl2g; 1242 IS wis,gwis; 1243 PetscInt cnv,cne; 1244 1245 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1246 if (fl2g) { 1247 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1248 } else { 1249 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1250 pcbddc->nedclocal = wis; 1251 } 1252 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1253 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1254 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1255 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1256 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1257 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1258 1259 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1260 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1261 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1262 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1263 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1264 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1265 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1266 1267 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1268 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1269 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1270 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1271 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1272 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1273 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1274 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1275 } 1276 1277 #if defined(PRINT_GDET) 1278 inc = 0; 1279 lev = pcbddc->current_level; 1280 #endif 1281 for (i=0;i<nee;i++) { 1282 Mat Gins = NULL, GKins = NULL; 1283 IS cornersis = NULL; 1284 PetscScalar cvals[2]; 1285 1286 if (pcbddc->nedcG) { 1287 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1288 } 1289 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1290 if (Gins && GKins) { 1291 PetscScalar *data; 1292 const PetscInt *rows,*cols; 1293 PetscInt nrh,nch,nrc,ncc; 1294 1295 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1296 /* H1 */ 1297 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1298 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1299 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1300 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1301 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1302 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1303 /* complement */ 1304 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1305 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1306 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 1307 if (ncc != 1 && pcbddc->nedcG) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the dicrete gradient for the next level with ncc %d",ncc); 1308 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1309 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1310 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1311 1312 /* coarse discrete gradient */ 1313 if (pcbddc->nedcG) { 1314 PetscInt cols[2]; 1315 1316 cols[0] = 2*i; 1317 cols[1] = 2*i+1; 1318 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1319 } 1320 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1321 } 1322 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1323 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1324 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1325 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1326 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1327 } 1328 1329 /* Start assembling */ 1330 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1331 if (pcbddc->nedcG) { 1332 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1333 } 1334 1335 /* Free */ 1336 if (fl2g) { 1337 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1338 for (i=0;i<nee;i++) { 1339 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1340 } 1341 ierr = PetscFree(eedges);CHKERRQ(ierr); 1342 } 1343 1344 /* hack mat_graph with primal dofs on the coarse edges */ 1345 { 1346 PCBDDCGraph graph = pcbddc->mat_graph; 1347 PetscInt *oqueue = graph->queue; 1348 PetscInt *ocptr = graph->cptr; 1349 PetscInt ncc,*idxs; 1350 1351 /* find first primal edge */ 1352 if (pcbddc->nedclocal) { 1353 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1354 } else { 1355 if (fl2g) { 1356 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1357 } 1358 idxs = cedges; 1359 } 1360 cum = 0; 1361 while (cum < nee && cedges[cum] < 0) cum++; 1362 1363 /* adapt connected components */ 1364 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1365 graph->cptr[0] = 0; 1366 for (i=0,ncc=0;i<graph->ncc;i++) { 1367 PetscInt lc = ocptr[i+1]-ocptr[i]; 1368 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1369 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1370 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1371 ncc++; 1372 lc--; 1373 cum++; 1374 while (cum < nee && cedges[cum] < 0) cum++; 1375 } 1376 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1377 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1378 ncc++; 1379 } 1380 graph->ncc = ncc; 1381 if (pcbddc->nedclocal) { 1382 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1383 } 1384 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1385 } 1386 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1387 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1388 1389 1390 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1391 ierr = PetscFree(extrow);CHKERRQ(ierr); 1392 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1393 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1394 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1395 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1396 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1397 ierr = PetscFree(corners);CHKERRQ(ierr); 1398 ierr = PetscFree(cedges);CHKERRQ(ierr); 1399 ierr = PetscFree(extrows);CHKERRQ(ierr); 1400 ierr = PetscFree(extcols);CHKERRQ(ierr); 1401 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1402 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1403 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1404 1405 /* Complete assembling */ 1406 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 if (pcbddc->nedcG) { 1408 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1409 #if 0 1410 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1411 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1412 #endif 1413 } 1414 1415 /* set change of basis */ 1416 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1417 #if 0 1418 if (pcbddc->current_level) { 1419 PetscViewer viewer; 1420 char filename[256]; 1421 Mat Tned; 1422 IS sub; 1423 PetscInt rst; 1424 1425 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1426 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 1427 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 1428 if (nedfieldlocal) { 1429 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1430 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 1431 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1432 } else { 1433 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 1434 } 1435 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1436 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1437 ierr = MatGetOwnershipRange(pc->pmat,&rst,NULL);CHKERRQ(ierr); 1438 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 1439 if (matis->sf_rootdata[i]) { 1440 matis->sf_rootdata[cum++] = i + rst; 1441 } 1442 } 1443 PetscPrintf(PETSC_COMM_SELF,"[%D] LEVEL %d MY ne %d cum %d\n",PetscGlobalRank,pcbddc->current_level,ne,cum); 1444 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cum,matis->sf_rootdata,PETSC_USE_POINTER,&sub);CHKERRQ(ierr); 1445 ierr = MatGetSubMatrix(T,sub,sub,MAT_INITIAL_MATRIX,&Tned);CHKERRQ(ierr); 1446 ierr = ISDestroy(&sub);CHKERRQ(ierr); 1447 1448 sprintf(filename,"Change_l%d.m",pcbddc->current_level); 1449 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)Tned),filename,&viewer);CHKERRQ(ierr); 1450 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1451 ierr = PetscObjectSetName((PetscObject)Tned,"T");CHKERRQ(ierr); 1452 ierr = MatView(Tned,viewer);CHKERRQ(ierr); 1453 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1454 ierr = MatDestroy(&Tned);CHKERRQ(ierr); 1455 } 1456 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1457 #endif 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1468 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1469 { 1470 PetscErrorCode ierr; 1471 PetscInt i; 1472 1473 PetscFunctionBegin; 1474 for (i=0;i<nvecs;i++) { 1475 PetscInt first,last; 1476 1477 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1478 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1479 if (i>=first && i < last) { 1480 PetscScalar *data; 1481 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1482 if (!has_const) { 1483 data[i-first] = 1.; 1484 } else { 1485 data[2*i-first] = 1./PetscSqrtReal(2.); 1486 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1487 } 1488 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1489 } 1490 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1491 } 1492 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1493 for (i=0;i<nvecs;i++) { /* reset vectors */ 1494 PetscInt first,last; 1495 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1496 if (i>=first && i < last) { 1497 PetscScalar *data; 1498 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1499 if (!has_const) { 1500 data[i-first] = 0.; 1501 } else { 1502 data[2*i-first] = 0.; 1503 data[2*i-first+1] = 0.; 1504 } 1505 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1506 } 1507 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1514 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1515 { 1516 Mat loc_divudotp; 1517 Vec p,v,vins,quad_vec,*quad_vecs; 1518 ISLocalToGlobalMapping map; 1519 IS *faces,*edges; 1520 PetscScalar *vals; 1521 const PetscScalar *array; 1522 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1523 PetscMPIInt rank; 1524 PetscErrorCode ierr; 1525 1526 PetscFunctionBegin; 1527 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1528 if (graph->twodim) { 1529 lmaxneighs = 2; 1530 } else { 1531 lmaxneighs = 1; 1532 for (i=0;i<ne;i++) { 1533 const PetscInt *idxs; 1534 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1535 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1536 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1537 } 1538 lmaxneighs++; /* graph count does not include self */ 1539 } 1540 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1541 maxsize = 0; 1542 for (i=0;i<ne;i++) { 1543 PetscInt nn; 1544 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1545 maxsize = PetscMax(maxsize,nn); 1546 } 1547 for (i=0;i<nf;i++) { 1548 PetscInt nn; 1549 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1550 maxsize = PetscMax(maxsize,nn); 1551 } 1552 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1553 /* create vectors to hold quadrature weights */ 1554 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1555 if (!transpose) { 1556 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1557 } else { 1558 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1559 } 1560 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1561 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1562 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1563 for (i=0;i<maxneighs;i++) { 1564 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1565 } 1566 1567 /* compute local quad vec */ 1568 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1569 if (!transpose) { 1570 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1571 } else { 1572 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1573 } 1574 ierr = VecSet(p,1.);CHKERRQ(ierr); 1575 if (!transpose) { 1576 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1577 } else { 1578 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1579 } 1580 if (vl2l) { 1581 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1582 } else { 1583 vins = v; 1584 } 1585 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1586 ierr = VecDestroy(&p);CHKERRQ(ierr); 1587 1588 /* insert in global quadrature vecs */ 1589 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1590 for (i=0;i<nf;i++) { 1591 const PetscInt *idxs; 1592 PetscInt idx,nn,j; 1593 1594 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1595 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1596 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1597 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1598 idx = -(idx+1); 1599 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1600 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1601 } 1602 for (i=0;i<ne;i++) { 1603 const PetscInt *idxs; 1604 PetscInt idx,nn,j; 1605 1606 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1607 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1608 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1609 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1610 idx = -(idx+1); 1611 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1612 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1613 } 1614 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1615 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1616 if (vl2l) { 1617 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1618 } 1619 ierr = VecDestroy(&v);CHKERRQ(ierr); 1620 ierr = PetscFree(vals);CHKERRQ(ierr); 1621 1622 /* assemble near null space */ 1623 for (i=0;i<maxneighs;i++) { 1624 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1625 } 1626 for (i=0;i<maxneighs;i++) { 1627 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1628 } 1629 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1630 PetscFunctionReturn(0); 1631 } 1632 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1636 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1637 { 1638 PetscErrorCode ierr; 1639 Vec local,global; 1640 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1641 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1642 1643 PetscFunctionBegin; 1644 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1645 /* need to convert from global to local topology information and remove references to information in global ordering */ 1646 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1647 if (pcbddc->user_provided_isfordofs) { 1648 if (pcbddc->n_ISForDofs) { 1649 PetscInt i; 1650 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1651 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1652 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1653 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1654 } 1655 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1656 pcbddc->n_ISForDofs = 0; 1657 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1658 } 1659 } else { 1660 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1661 PetscInt i, n = matis->A->rmap->n; 1662 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1663 if (i > 1) { 1664 pcbddc->n_ISForDofsLocal = i; 1665 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1666 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1667 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1668 } 1669 } 1670 } 1671 } 1672 1673 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1674 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1675 } 1676 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1677 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1678 } 1679 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1680 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1681 } 1682 ierr = VecDestroy(&global);CHKERRQ(ierr); 1683 ierr = VecDestroy(&local);CHKERRQ(ierr); 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1689 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1690 { 1691 PC_IS *pcis = (PC_IS*)(pc->data); 1692 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 if (!pcbddc->benign_have_null) { 1697 PetscFunctionReturn(0); 1698 } 1699 if (pcbddc->ChangeOfBasisMatrix) { 1700 Vec swap; 1701 1702 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1703 swap = pcbddc->work_change; 1704 pcbddc->work_change = r; 1705 r = swap; 1706 } 1707 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1708 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1709 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1710 ierr = VecSet(z,0.);CHKERRQ(ierr); 1711 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1712 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1713 if (pcbddc->ChangeOfBasisMatrix) { 1714 pcbddc->work_change = r; 1715 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1716 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1717 } 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1723 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1724 { 1725 PCBDDCBenignMatMult_ctx ctx; 1726 PetscErrorCode ierr; 1727 PetscBool apply_right,apply_left,reset_x; 1728 1729 PetscFunctionBegin; 1730 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1731 if (transpose) { 1732 apply_right = ctx->apply_left; 1733 apply_left = ctx->apply_right; 1734 } else { 1735 apply_right = ctx->apply_right; 1736 apply_left = ctx->apply_left; 1737 } 1738 reset_x = PETSC_FALSE; 1739 if (apply_right) { 1740 const PetscScalar *ax; 1741 PetscInt nl,i; 1742 1743 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1744 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1745 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1746 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1747 for (i=0;i<ctx->benign_n;i++) { 1748 PetscScalar sum,val; 1749 const PetscInt *idxs; 1750 PetscInt nz,j; 1751 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1752 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1753 sum = 0.; 1754 if (ctx->apply_p0) { 1755 val = ctx->work[idxs[nz-1]]; 1756 for (j=0;j<nz-1;j++) { 1757 sum += ctx->work[idxs[j]]; 1758 ctx->work[idxs[j]] += val; 1759 } 1760 } else { 1761 for (j=0;j<nz-1;j++) { 1762 sum += ctx->work[idxs[j]]; 1763 } 1764 } 1765 ctx->work[idxs[nz-1]] -= sum; 1766 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1767 } 1768 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1769 reset_x = PETSC_TRUE; 1770 } 1771 if (transpose) { 1772 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1773 } else { 1774 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1775 } 1776 if (reset_x) { 1777 ierr = VecResetArray(x);CHKERRQ(ierr); 1778 } 1779 if (apply_left) { 1780 PetscScalar *ay; 1781 PetscInt i; 1782 1783 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1784 for (i=0;i<ctx->benign_n;i++) { 1785 PetscScalar sum,val; 1786 const PetscInt *idxs; 1787 PetscInt nz,j; 1788 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1789 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1790 val = -ay[idxs[nz-1]]; 1791 if (ctx->apply_p0) { 1792 sum = 0.; 1793 for (j=0;j<nz-1;j++) { 1794 sum += ay[idxs[j]]; 1795 ay[idxs[j]] += val; 1796 } 1797 ay[idxs[nz-1]] += sum; 1798 } else { 1799 for (j=0;j<nz-1;j++) { 1800 ay[idxs[j]] += val; 1801 } 1802 ay[idxs[nz-1]] = 0.; 1803 } 1804 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1805 } 1806 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1807 } 1808 PetscFunctionReturn(0); 1809 } 1810 1811 #undef __FUNCT__ 1812 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1813 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1814 { 1815 PetscErrorCode ierr; 1816 1817 PetscFunctionBegin; 1818 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1824 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1825 { 1826 PetscErrorCode ierr; 1827 1828 PetscFunctionBegin; 1829 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "PCBDDCBenignShellMat" 1835 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1836 { 1837 PC_IS *pcis = (PC_IS*)pc->data; 1838 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1839 PCBDDCBenignMatMult_ctx ctx; 1840 PetscErrorCode ierr; 1841 1842 PetscFunctionBegin; 1843 if (!restore) { 1844 Mat A_IB,A_BI; 1845 PetscScalar *work; 1846 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1847 1848 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1849 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1850 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1851 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1852 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1853 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1854 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1855 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1856 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1857 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1858 ctx->apply_left = PETSC_TRUE; 1859 ctx->apply_right = PETSC_FALSE; 1860 ctx->apply_p0 = PETSC_FALSE; 1861 ctx->benign_n = pcbddc->benign_n; 1862 if (reuse) { 1863 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1864 ctx->free = PETSC_FALSE; 1865 } else { /* TODO: could be optimized for successive solves */ 1866 ISLocalToGlobalMapping N_to_D; 1867 PetscInt i; 1868 1869 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1870 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1871 for (i=0;i<pcbddc->benign_n;i++) { 1872 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1873 } 1874 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1875 ctx->free = PETSC_TRUE; 1876 } 1877 ctx->A = pcis->A_IB; 1878 ctx->work = work; 1879 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1880 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1882 pcis->A_IB = A_IB; 1883 1884 /* A_BI as A_IB^T */ 1885 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1886 pcbddc->benign_original_mat = pcis->A_BI; 1887 pcis->A_BI = A_BI; 1888 } else { 1889 if (!pcbddc->benign_original_mat) { 1890 PetscFunctionReturn(0); 1891 } 1892 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1893 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1894 pcis->A_IB = ctx->A; 1895 ctx->A = NULL; 1896 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1897 pcis->A_BI = pcbddc->benign_original_mat; 1898 pcbddc->benign_original_mat = NULL; 1899 if (ctx->free) { 1900 PetscInt i; 1901 for (i=0;i<ctx->benign_n;i++) { 1902 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1903 } 1904 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1905 } 1906 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1907 ierr = PetscFree(ctx);CHKERRQ(ierr); 1908 } 1909 PetscFunctionReturn(0); 1910 } 1911 1912 /* used just in bddc debug mode */ 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "PCBDDCBenignProject" 1915 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1916 { 1917 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1918 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1919 Mat An; 1920 PetscErrorCode ierr; 1921 1922 PetscFunctionBegin; 1923 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1924 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1925 if (is1) { 1926 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1927 ierr = MatDestroy(&An);CHKERRQ(ierr); 1928 } else { 1929 *B = An; 1930 } 1931 PetscFunctionReturn(0); 1932 } 1933 1934 /* TODO: add reuse flag */ 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatSeqAIJCompress" 1937 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1938 { 1939 Mat Bt; 1940 PetscScalar *a,*bdata; 1941 const PetscInt *ii,*ij; 1942 PetscInt m,n,i,nnz,*bii,*bij; 1943 PetscBool flg_row; 1944 PetscErrorCode ierr; 1945 1946 PetscFunctionBegin; 1947 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1948 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1949 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1950 nnz = n; 1951 for (i=0;i<ii[n];i++) { 1952 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1953 } 1954 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1955 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1956 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1957 nnz = 0; 1958 bii[0] = 0; 1959 for (i=0;i<n;i++) { 1960 PetscInt j; 1961 for (j=ii[i];j<ii[i+1];j++) { 1962 PetscScalar entry = a[j]; 1963 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1964 bij[nnz] = ij[j]; 1965 bdata[nnz] = entry; 1966 nnz++; 1967 } 1968 } 1969 bii[i+1] = nnz; 1970 } 1971 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1972 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1973 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1974 { 1975 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1976 b->free_a = PETSC_TRUE; 1977 b->free_ij = PETSC_TRUE; 1978 } 1979 *B = Bt; 1980 PetscFunctionReturn(0); 1981 } 1982 1983 #undef __FUNCT__ 1984 #define __FUNCT__ "MatDetectDisconnectedComponents" 1985 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1986 { 1987 Mat B; 1988 IS is_dummy,*cc_n; 1989 ISLocalToGlobalMapping l2gmap_dummy; 1990 PCBDDCGraph graph; 1991 PetscInt i,n; 1992 PetscInt *xadj,*adjncy; 1993 PetscInt *xadj_filtered,*adjncy_filtered; 1994 PetscBool flg_row,isseqaij; 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 if (!A->rmap->N || !A->cmap->N) { 1999 *ncc = 0; 2000 *cc = NULL; 2001 PetscFunctionReturn(0); 2002 } 2003 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2004 if (!isseqaij && filter) { 2005 PetscBool isseqdense; 2006 2007 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2008 if (!isseqdense) { 2009 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2010 } else { /* TODO: rectangular case and LDA */ 2011 PetscScalar *array; 2012 PetscReal chop=1.e-6; 2013 2014 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2015 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2016 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2017 for (i=0;i<n;i++) { 2018 PetscInt j; 2019 for (j=i+1;j<n;j++) { 2020 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2021 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2022 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2023 } 2024 } 2025 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2026 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2027 } 2028 } else { 2029 B = A; 2030 } 2031 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2032 2033 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2034 if (filter) { 2035 PetscScalar *data; 2036 PetscInt j,cum; 2037 2038 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2039 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2040 cum = 0; 2041 for (i=0;i<n;i++) { 2042 PetscInt t; 2043 2044 for (j=xadj[i];j<xadj[i+1];j++) { 2045 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2046 continue; 2047 } 2048 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2049 } 2050 t = xadj_filtered[i]; 2051 xadj_filtered[i] = cum; 2052 cum += t; 2053 } 2054 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2055 } else { 2056 xadj_filtered = NULL; 2057 adjncy_filtered = NULL; 2058 } 2059 2060 /* compute local connected components using PCBDDCGraph */ 2061 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2062 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2063 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2064 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2065 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2066 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2067 if (xadj_filtered) { 2068 graph->xadj = xadj_filtered; 2069 graph->adjncy = adjncy_filtered; 2070 } else { 2071 graph->xadj = xadj; 2072 graph->adjncy = adjncy; 2073 } 2074 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2075 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2076 /* partial clean up */ 2077 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2078 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2079 if (A != B) { 2080 ierr = MatDestroy(&B);CHKERRQ(ierr); 2081 } 2082 2083 /* get back data */ 2084 if (ncc) *ncc = graph->ncc; 2085 if (cc) { 2086 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2087 for (i=0;i<graph->ncc;i++) { 2088 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2089 } 2090 *cc = cc_n; 2091 } 2092 /* clean up graph */ 2093 graph->xadj = 0; 2094 graph->adjncy = 0; 2095 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "PCBDDCBenignCheck" 2101 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2102 { 2103 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2104 PC_IS* pcis = (PC_IS*)(pc->data); 2105 IS dirIS = NULL; 2106 PetscInt i; 2107 PetscErrorCode ierr; 2108 2109 PetscFunctionBegin; 2110 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2111 if (zerodiag) { 2112 Mat A; 2113 Vec vec3_N; 2114 PetscScalar *vals; 2115 const PetscInt *idxs; 2116 PetscInt nz,*count; 2117 2118 /* p0 */ 2119 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2120 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2121 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2122 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2123 for (i=0;i<nz;i++) vals[i] = 1.; 2124 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2125 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2126 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2127 /* v_I */ 2128 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2129 for (i=0;i<nz;i++) vals[i] = 0.; 2130 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2131 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2132 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2133 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2134 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2135 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2136 if (dirIS) { 2137 PetscInt n; 2138 2139 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2140 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2141 for (i=0;i<n;i++) vals[i] = 0.; 2142 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2143 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2144 } 2145 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2146 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2147 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2148 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2149 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2150 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2151 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2152 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2153 ierr = PetscFree(vals);CHKERRQ(ierr); 2154 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2155 2156 /* there should not be any pressure dofs lying on the interface */ 2157 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2158 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2159 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2160 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2161 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2162 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2163 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2164 ierr = PetscFree(count);CHKERRQ(ierr); 2165 } 2166 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2167 2168 /* check PCBDDCBenignGetOrSetP0 */ 2169 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2170 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2171 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2172 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2173 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2174 for (i=0;i<pcbddc->benign_n;i++) { 2175 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2176 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2177 } 2178 PetscFunctionReturn(0); 2179 } 2180 2181 #undef __FUNCT__ 2182 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 2183 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2184 { 2185 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2186 IS pressures,zerodiag,*zerodiag_subs; 2187 PetscInt nz,n; 2188 PetscInt *interior_dofs,n_interior_dofs; 2189 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 2190 PetscErrorCode ierr; 2191 2192 PetscFunctionBegin; 2193 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2194 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2195 for (n=0;n<pcbddc->benign_n;n++) { 2196 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2197 } 2198 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2199 pcbddc->benign_n = 0; 2200 /* if a local info on dofs is present, assumes that the last field represents "pressures" 2201 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2202 Checks if all the pressure dofs in each subdomain have a zero diagonal 2203 If not, a change of basis on pressures is not needed 2204 since the local Schur complements are already SPD 2205 */ 2206 has_null_pressures = PETSC_TRUE; 2207 have_null = PETSC_TRUE; 2208 if (pcbddc->n_ISForDofsLocal) { 2209 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2210 2211 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2212 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2213 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2214 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2215 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2216 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2217 if (!sorted) { 2218 ierr = ISSort(pressures);CHKERRQ(ierr); 2219 } 2220 } else { 2221 pressures = NULL; 2222 } 2223 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2224 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2225 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2226 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2227 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2228 if (!sorted) { 2229 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2230 } 2231 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2232 if (!nz) { 2233 if (n) have_null = PETSC_FALSE; 2234 has_null_pressures = PETSC_FALSE; 2235 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2236 } 2237 recompute_zerodiag = PETSC_FALSE; 2238 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2239 zerodiag_subs = NULL; 2240 pcbddc->benign_n = 0; 2241 n_interior_dofs = 0; 2242 interior_dofs = NULL; 2243 if (pcbddc->current_level) { /* need to compute interior nodes */ 2244 PetscInt n,i,j; 2245 PetscInt n_neigh,*neigh,*n_shared,**shared; 2246 PetscInt *iwork; 2247 2248 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2249 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2250 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2251 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2252 for (i=1;i<n_neigh;i++) 2253 for (j=0;j<n_shared[i];j++) 2254 iwork[shared[i][j]] += 1; 2255 for (i=0;i<n;i++) 2256 if (!iwork[i]) 2257 interior_dofs[n_interior_dofs++] = i; 2258 ierr = PetscFree(iwork);CHKERRQ(ierr); 2259 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2260 } 2261 if (has_null_pressures) { 2262 IS *subs; 2263 PetscInt nsubs,i,j,nl; 2264 const PetscInt *idxs; 2265 PetscScalar *array; 2266 Vec *work; 2267 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2268 2269 subs = pcbddc->local_subs; 2270 nsubs = pcbddc->n_local_subs; 2271 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2272 if (pcbddc->current_level) { 2273 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2274 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2275 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2276 /* work[0] = 1_p */ 2277 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2278 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2279 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2280 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2281 /* work[0] = 1_v */ 2282 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2283 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2284 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2285 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2286 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2287 } 2288 if (nsubs > 1) { 2289 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2290 for (i=0;i<nsubs;i++) { 2291 ISLocalToGlobalMapping l2g; 2292 IS t_zerodiag_subs; 2293 PetscInt nl; 2294 2295 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2296 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2297 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2298 if (nl) { 2299 PetscBool valid = PETSC_TRUE; 2300 2301 if (pcbddc->current_level) { 2302 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2303 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2304 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2305 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2306 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2307 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2308 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2309 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2310 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2311 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2312 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2313 for (j=0;j<n_interior_dofs;j++) { 2314 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2315 valid = PETSC_FALSE; 2316 break; 2317 } 2318 } 2319 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2320 } 2321 if (valid && pcbddc->NeumannBoundariesLocal) { 2322 IS t_bc; 2323 PetscInt nzb; 2324 2325 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 2326 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 2327 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 2328 if (nzb) valid = PETSC_FALSE; 2329 } 2330 if (valid && pressures) { 2331 IS t_pressure_subs; 2332 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2333 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2334 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2335 } 2336 if (valid) { 2337 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2338 pcbddc->benign_n++; 2339 } else { 2340 recompute_zerodiag = PETSC_TRUE; 2341 } 2342 } 2343 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2344 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2345 } 2346 } else { /* there's just one subdomain (or zero if they have not been detected */ 2347 PetscBool valid = PETSC_TRUE; 2348 2349 if (pcbddc->NeumannBoundariesLocal) { 2350 PetscInt nzb; 2351 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 2352 if (nzb) valid = PETSC_FALSE; 2353 } 2354 if (valid && pressures) { 2355 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2356 } 2357 if (valid && pcbddc->current_level) { 2358 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2359 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2360 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2361 for (j=0;j<n_interior_dofs;j++) { 2362 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2363 valid = PETSC_FALSE; 2364 break; 2365 } 2366 } 2367 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2368 } 2369 if (valid) { 2370 pcbddc->benign_n = 1; 2371 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2372 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2373 zerodiag_subs[0] = zerodiag; 2374 } 2375 } 2376 if (pcbddc->current_level) { 2377 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2378 } 2379 } 2380 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2381 2382 if (!pcbddc->benign_n) { 2383 PetscInt n; 2384 2385 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2386 recompute_zerodiag = PETSC_FALSE; 2387 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2388 if (n) { 2389 has_null_pressures = PETSC_FALSE; 2390 have_null = PETSC_FALSE; 2391 } 2392 } 2393 2394 /* final check for null pressures */ 2395 if (zerodiag && pressures) { 2396 PetscInt nz,np; 2397 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2398 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2399 if (nz != np) have_null = PETSC_FALSE; 2400 } 2401 2402 if (recompute_zerodiag) { 2403 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2404 if (pcbddc->benign_n == 1) { 2405 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2406 zerodiag = zerodiag_subs[0]; 2407 } else { 2408 PetscInt i,nzn,*new_idxs; 2409 2410 nzn = 0; 2411 for (i=0;i<pcbddc->benign_n;i++) { 2412 PetscInt ns; 2413 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2414 nzn += ns; 2415 } 2416 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2417 nzn = 0; 2418 for (i=0;i<pcbddc->benign_n;i++) { 2419 PetscInt ns,*idxs; 2420 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2421 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2422 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2423 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2424 nzn += ns; 2425 } 2426 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2427 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2428 } 2429 have_null = PETSC_FALSE; 2430 } 2431 2432 /* Prepare matrix to compute no-net-flux */ 2433 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2434 Mat A,loc_divudotp; 2435 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2436 IS row,col,isused = NULL; 2437 PetscInt M,N,n,st,n_isused; 2438 2439 if (pressures) { 2440 isused = pressures; 2441 } else { 2442 isused = zerodiag; 2443 } 2444 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2445 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2446 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2447 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2448 n_isused = 0; 2449 if (isused) { 2450 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2451 } 2452 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2453 st = st-n_isused; 2454 if (n) { 2455 const PetscInt *gidxs; 2456 2457 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2458 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2459 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2460 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2461 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2462 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2463 } else { 2464 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2465 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2466 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2467 } 2468 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2469 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2470 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2471 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2472 ierr = ISDestroy(&row);CHKERRQ(ierr); 2473 ierr = ISDestroy(&col);CHKERRQ(ierr); 2474 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2475 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2476 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2477 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2478 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2479 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2480 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2481 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2482 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2483 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2484 } 2485 2486 /* change of basis and p0 dofs */ 2487 if (has_null_pressures) { 2488 IS zerodiagc; 2489 const PetscInt *idxs,*idxsc; 2490 PetscInt i,s,*nnz; 2491 2492 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2493 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2494 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2495 /* local change of basis for pressures */ 2496 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2497 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2498 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2499 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2500 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2501 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2502 for (i=0;i<pcbddc->benign_n;i++) { 2503 PetscInt nzs,j; 2504 2505 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2506 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2507 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2508 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2509 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2510 } 2511 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2512 ierr = PetscFree(nnz);CHKERRQ(ierr); 2513 /* set identity on velocities */ 2514 for (i=0;i<n-nz;i++) { 2515 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2516 } 2517 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2518 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2519 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2520 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2521 /* set change on pressures */ 2522 for (s=0;s<pcbddc->benign_n;s++) { 2523 PetscScalar *array; 2524 PetscInt nzs; 2525 2526 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2527 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2528 for (i=0;i<nzs-1;i++) { 2529 PetscScalar vals[2]; 2530 PetscInt cols[2]; 2531 2532 cols[0] = idxs[i]; 2533 cols[1] = idxs[nzs-1]; 2534 vals[0] = 1.; 2535 vals[1] = 1.; 2536 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2537 } 2538 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2539 for (i=0;i<nzs-1;i++) array[i] = -1.; 2540 array[nzs-1] = 1.; 2541 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2542 /* store local idxs for p0 */ 2543 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2544 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2545 ierr = PetscFree(array);CHKERRQ(ierr); 2546 } 2547 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2548 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2549 /* project if needed */ 2550 if (pcbddc->benign_change_explicit) { 2551 Mat M; 2552 2553 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2554 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2555 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2556 ierr = MatDestroy(&M);CHKERRQ(ierr); 2557 } 2558 /* store global idxs for p0 */ 2559 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2560 } 2561 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2562 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2563 2564 /* determines if the coarse solver will be singular or not */ 2565 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2566 /* determines if the problem has subdomains with 0 pressure block */ 2567 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2568 *zerodiaglocal = zerodiag; 2569 PetscFunctionReturn(0); 2570 } 2571 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2574 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2575 { 2576 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2577 PetscScalar *array; 2578 PetscErrorCode ierr; 2579 2580 PetscFunctionBegin; 2581 if (!pcbddc->benign_sf) { 2582 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2583 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2584 } 2585 if (get) { 2586 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2587 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2588 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2589 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2590 } else { 2591 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2592 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2593 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2594 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2595 } 2596 PetscFunctionReturn(0); 2597 } 2598 2599 #undef __FUNCT__ 2600 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2601 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2602 { 2603 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2604 PetscErrorCode ierr; 2605 2606 PetscFunctionBegin; 2607 /* TODO: add error checking 2608 - avoid nested pop (or push) calls. 2609 - cannot push before pop. 2610 - cannot call this if pcbddc->local_mat is NULL 2611 */ 2612 if (!pcbddc->benign_n) { 2613 PetscFunctionReturn(0); 2614 } 2615 if (pop) { 2616 if (pcbddc->benign_change_explicit) { 2617 IS is_p0; 2618 MatReuse reuse; 2619 2620 /* extract B_0 */ 2621 reuse = MAT_INITIAL_MATRIX; 2622 if (pcbddc->benign_B0) { 2623 reuse = MAT_REUSE_MATRIX; 2624 } 2625 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2626 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2627 /* remove rows and cols from local problem */ 2628 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2629 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2630 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2631 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2632 } else { 2633 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2634 PetscScalar *vals; 2635 PetscInt i,n,*idxs_ins; 2636 2637 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2638 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2639 if (!pcbddc->benign_B0) { 2640 PetscInt *nnz; 2641 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2642 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2643 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2644 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2645 for (i=0;i<pcbddc->benign_n;i++) { 2646 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2647 nnz[i] = n - nnz[i]; 2648 } 2649 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2650 ierr = PetscFree(nnz);CHKERRQ(ierr); 2651 } 2652 2653 for (i=0;i<pcbddc->benign_n;i++) { 2654 PetscScalar *array; 2655 PetscInt *idxs,j,nz,cum; 2656 2657 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2658 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2659 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2660 for (j=0;j<nz;j++) vals[j] = 1.; 2661 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2662 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2663 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2664 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2665 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2666 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2667 cum = 0; 2668 for (j=0;j<n;j++) { 2669 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2670 vals[cum] = array[j]; 2671 idxs_ins[cum] = j; 2672 cum++; 2673 } 2674 } 2675 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2676 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2677 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2678 } 2679 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2680 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2681 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2682 } 2683 } else { /* push */ 2684 if (pcbddc->benign_change_explicit) { 2685 PetscInt i; 2686 2687 for (i=0;i<pcbddc->benign_n;i++) { 2688 PetscScalar *B0_vals; 2689 PetscInt *B0_cols,B0_ncol; 2690 2691 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2692 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2693 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2694 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2695 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2696 } 2697 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2698 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2699 } else { 2700 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2701 } 2702 } 2703 PetscFunctionReturn(0); 2704 } 2705 2706 #undef __FUNCT__ 2707 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2708 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2709 { 2710 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2711 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2712 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2713 PetscBLASInt *B_iwork,*B_ifail; 2714 PetscScalar *work,lwork; 2715 PetscScalar *St,*S,*eigv; 2716 PetscScalar *Sarray,*Starray; 2717 PetscReal *eigs,thresh; 2718 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2719 PetscBool allocated_S_St; 2720 #if defined(PETSC_USE_COMPLEX) 2721 PetscReal *rwork; 2722 #endif 2723 PetscErrorCode ierr; 2724 2725 PetscFunctionBegin; 2726 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2727 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2728 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2729 2730 if (pcbddc->dbg_flag) { 2731 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2732 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2733 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2734 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2735 } 2736 2737 if (pcbddc->dbg_flag) { 2738 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2739 } 2740 2741 /* max size of subsets */ 2742 mss = 0; 2743 for (i=0;i<sub_schurs->n_subs;i++) { 2744 PetscInt subset_size; 2745 2746 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2747 mss = PetscMax(mss,subset_size); 2748 } 2749 2750 /* min/max and threshold */ 2751 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2752 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2753 nmax = PetscMax(nmin,nmax); 2754 allocated_S_St = PETSC_FALSE; 2755 if (nmin) { 2756 allocated_S_St = PETSC_TRUE; 2757 } 2758 2759 /* allocate lapack workspace */ 2760 cum = cum2 = 0; 2761 maxneigs = 0; 2762 for (i=0;i<sub_schurs->n_subs;i++) { 2763 PetscInt n,subset_size; 2764 2765 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2766 n = PetscMin(subset_size,nmax); 2767 cum += subset_size; 2768 cum2 += subset_size*n; 2769 maxneigs = PetscMax(maxneigs,n); 2770 } 2771 if (mss) { 2772 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2773 PetscBLASInt B_itype = 1; 2774 PetscBLASInt B_N = mss; 2775 PetscReal zero = 0.0; 2776 PetscReal eps = 0.0; /* dlamch? */ 2777 2778 B_lwork = -1; 2779 S = NULL; 2780 St = NULL; 2781 eigs = NULL; 2782 eigv = NULL; 2783 B_iwork = NULL; 2784 B_ifail = NULL; 2785 #if defined(PETSC_USE_COMPLEX) 2786 rwork = NULL; 2787 #endif 2788 thresh = 1.0; 2789 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2790 #if defined(PETSC_USE_COMPLEX) 2791 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2792 #else 2793 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2794 #endif 2795 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2796 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2797 } else { 2798 /* TODO */ 2799 } 2800 } else { 2801 lwork = 0; 2802 } 2803 2804 nv = 0; 2805 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2806 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2807 } 2808 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2809 if (allocated_S_St) { 2810 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2811 } 2812 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2813 #if defined(PETSC_USE_COMPLEX) 2814 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2815 #endif 2816 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2817 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2818 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2819 nv+cum,&pcbddc->adaptive_constraints_idxs, 2820 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2821 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2822 2823 maxneigs = 0; 2824 cum = cumarray = 0; 2825 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2826 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2827 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2828 const PetscInt *idxs; 2829 2830 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2831 for (cum=0;cum<nv;cum++) { 2832 pcbddc->adaptive_constraints_n[cum] = 1; 2833 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2834 pcbddc->adaptive_constraints_data[cum] = 1.0; 2835 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2836 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2837 } 2838 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2839 } 2840 2841 if (mss) { /* multilevel */ 2842 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2843 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2844 } 2845 2846 thresh = pcbddc->adaptive_threshold; 2847 for (i=0;i<sub_schurs->n_subs;i++) { 2848 const PetscInt *idxs; 2849 PetscReal upper,lower; 2850 PetscInt j,subset_size,eigs_start = 0; 2851 PetscBLASInt B_N; 2852 PetscBool same_data = PETSC_FALSE; 2853 2854 if (pcbddc->use_deluxe_scaling) { 2855 upper = PETSC_MAX_REAL; 2856 lower = thresh; 2857 } else { 2858 upper = 1./thresh; 2859 lower = 0.; 2860 } 2861 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2862 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2863 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2864 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2865 if (sub_schurs->is_hermitian) { 2866 PetscInt j,k; 2867 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2868 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2869 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2870 } 2871 for (j=0;j<subset_size;j++) { 2872 for (k=j;k<subset_size;k++) { 2873 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2874 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2875 } 2876 } 2877 } else { 2878 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2879 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2880 } 2881 } else { 2882 S = Sarray + cumarray; 2883 St = Starray + cumarray; 2884 } 2885 /* see if we can save some work */ 2886 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2887 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2888 } 2889 2890 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2891 B_neigs = 0; 2892 } else { 2893 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2894 PetscBLASInt B_itype = 1; 2895 PetscBLASInt B_IL, B_IU; 2896 PetscReal eps = -1.0; /* dlamch? */ 2897 PetscInt nmin_s; 2898 PetscBool compute_range = PETSC_FALSE; 2899 2900 if (pcbddc->dbg_flag) { 2901 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2902 } 2903 2904 compute_range = PETSC_FALSE; 2905 if (thresh > 1.+PETSC_SMALL && !same_data) { 2906 compute_range = PETSC_TRUE; 2907 } 2908 2909 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2910 if (compute_range) { 2911 2912 /* ask for eigenvalues larger than thresh */ 2913 #if defined(PETSC_USE_COMPLEX) 2914 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2915 #else 2916 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2917 #endif 2918 } else if (!same_data) { 2919 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2920 B_IL = 1; 2921 #if defined(PETSC_USE_COMPLEX) 2922 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2923 #else 2924 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2925 #endif 2926 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2927 PetscInt k; 2928 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2929 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2930 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2931 nmin = nmax; 2932 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2933 for (k=0;k<nmax;k++) { 2934 eigs[k] = 1./PETSC_SMALL; 2935 eigv[k*(subset_size+1)] = 1.0; 2936 } 2937 } 2938 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2939 if (B_ierr) { 2940 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2941 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2942 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2943 } 2944 2945 if (B_neigs > nmax) { 2946 if (pcbddc->dbg_flag) { 2947 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2948 } 2949 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2950 B_neigs = nmax; 2951 } 2952 2953 nmin_s = PetscMin(nmin,B_N); 2954 if (B_neigs < nmin_s) { 2955 PetscBLASInt B_neigs2; 2956 2957 if (pcbddc->use_deluxe_scaling) { 2958 B_IL = B_N - nmin_s + 1; 2959 B_IU = B_N - B_neigs; 2960 } else { 2961 B_IL = B_neigs + 1; 2962 B_IU = nmin_s; 2963 } 2964 if (pcbddc->dbg_flag) { 2965 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2966 } 2967 if (sub_schurs->is_hermitian) { 2968 PetscInt j,k; 2969 for (j=0;j<subset_size;j++) { 2970 for (k=j;k<subset_size;k++) { 2971 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2972 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2973 } 2974 } 2975 } else { 2976 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2977 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2978 } 2979 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2980 #if defined(PETSC_USE_COMPLEX) 2981 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2982 #else 2983 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2984 #endif 2985 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2986 B_neigs += B_neigs2; 2987 } 2988 if (B_ierr) { 2989 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2990 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2991 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2992 } 2993 if (pcbddc->dbg_flag) { 2994 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2995 for (j=0;j<B_neigs;j++) { 2996 if (eigs[j] == 0.0) { 2997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2998 } else { 2999 if (pcbddc->use_deluxe_scaling) { 3000 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3001 } else { 3002 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3003 } 3004 } 3005 } 3006 } 3007 } else { 3008 /* TODO */ 3009 } 3010 } 3011 /* change the basis back to the original one */ 3012 if (sub_schurs->change) { 3013 Mat change,phi,phit; 3014 3015 if (pcbddc->dbg_flag > 1) { 3016 PetscInt ii; 3017 for (ii=0;ii<B_neigs;ii++) { 3018 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3019 for (j=0;j<B_N;j++) { 3020 #if defined(PETSC_USE_COMPLEX) 3021 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3022 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3023 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3024 #else 3025 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3026 #endif 3027 } 3028 } 3029 } 3030 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3031 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3032 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3033 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3034 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3035 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3036 } 3037 maxneigs = PetscMax(B_neigs,maxneigs); 3038 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3039 if (B_neigs) { 3040 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3041 3042 if (pcbddc->dbg_flag > 1) { 3043 PetscInt ii; 3044 for (ii=0;ii<B_neigs;ii++) { 3045 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3046 for (j=0;j<B_N;j++) { 3047 #if defined(PETSC_USE_COMPLEX) 3048 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3049 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3050 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3051 #else 3052 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3053 #endif 3054 } 3055 } 3056 } 3057 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3058 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3059 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3060 cum++; 3061 } 3062 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3063 /* shift for next computation */ 3064 cumarray += subset_size*subset_size; 3065 } 3066 if (pcbddc->dbg_flag) { 3067 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3068 } 3069 3070 if (mss) { 3071 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3072 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3073 /* destroy matrices (junk) */ 3074 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3075 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3076 } 3077 if (allocated_S_St) { 3078 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3079 } 3080 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3081 #if defined(PETSC_USE_COMPLEX) 3082 ierr = PetscFree(rwork);CHKERRQ(ierr); 3083 #endif 3084 if (pcbddc->dbg_flag) { 3085 PetscInt maxneigs_r; 3086 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3087 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3088 } 3089 PetscFunctionReturn(0); 3090 } 3091 3092 #undef __FUNCT__ 3093 #define __FUNCT__ "PCBDDCSetUpSolvers" 3094 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3095 { 3096 PetscScalar *coarse_submat_vals; 3097 PetscErrorCode ierr; 3098 3099 PetscFunctionBegin; 3100 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3101 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3102 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3103 3104 /* Setup local neumann solver ksp_R */ 3105 /* PCBDDCSetUpLocalScatters should be called first! */ 3106 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3107 3108 /* 3109 Setup local correction and local part of coarse basis. 3110 Gives back the dense local part of the coarse matrix in column major ordering 3111 */ 3112 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3113 3114 /* Compute total number of coarse nodes and setup coarse solver */ 3115 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3116 3117 /* free */ 3118 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3119 PetscFunctionReturn(0); 3120 } 3121 3122 #undef __FUNCT__ 3123 #define __FUNCT__ "PCBDDCResetCustomization" 3124 PetscErrorCode PCBDDCResetCustomization(PC pc) 3125 { 3126 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3127 PetscErrorCode ierr; 3128 3129 PetscFunctionBegin; 3130 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3131 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3132 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3133 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3134 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3135 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3136 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3137 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3138 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3139 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3140 PetscFunctionReturn(0); 3141 } 3142 3143 #undef __FUNCT__ 3144 #define __FUNCT__ "PCBDDCResetTopography" 3145 PetscErrorCode PCBDDCResetTopography(PC pc) 3146 { 3147 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3148 PetscInt i; 3149 PetscErrorCode ierr; 3150 3151 PetscFunctionBegin; 3152 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3153 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3154 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3155 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3156 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3157 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3158 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3159 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3160 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3161 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3162 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3163 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3164 for (i=0;i<pcbddc->n_local_subs;i++) { 3165 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3166 } 3167 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3168 if (pcbddc->sub_schurs) { 3169 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 3170 } 3171 pcbddc->graphanalyzed = PETSC_FALSE; 3172 pcbddc->recompute_topography = PETSC_TRUE; 3173 PetscFunctionReturn(0); 3174 } 3175 3176 #undef __FUNCT__ 3177 #define __FUNCT__ "PCBDDCResetSolvers" 3178 PetscErrorCode PCBDDCResetSolvers(PC pc) 3179 { 3180 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3181 PetscErrorCode ierr; 3182 3183 PetscFunctionBegin; 3184 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3185 if (pcbddc->coarse_phi_B) { 3186 PetscScalar *array; 3187 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3188 ierr = PetscFree(array);CHKERRQ(ierr); 3189 } 3190 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3191 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3192 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3193 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3194 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3195 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3196 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3197 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3198 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3199 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3200 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3201 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3202 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3203 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3204 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 3205 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 3206 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3207 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3208 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3209 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3210 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3211 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3212 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3213 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3214 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3215 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3216 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3217 if (pcbddc->benign_zerodiag_subs) { 3218 PetscInt i; 3219 for (i=0;i<pcbddc->benign_n;i++) { 3220 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3221 } 3222 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3223 } 3224 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 3230 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3231 { 3232 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3233 PC_IS *pcis = (PC_IS*)pc->data; 3234 VecType impVecType; 3235 PetscInt n_constraints,n_R,old_size; 3236 PetscErrorCode ierr; 3237 3238 PetscFunctionBegin; 3239 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 3240 /* get sizes */ 3241 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3242 n_R = pcis->n - pcbddc->n_vertices; 3243 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3244 /* local work vectors (try to avoid unneeded work)*/ 3245 /* R nodes */ 3246 old_size = -1; 3247 if (pcbddc->vec1_R) { 3248 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3249 } 3250 if (n_R != old_size) { 3251 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3252 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3253 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3254 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3255 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3256 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3257 } 3258 /* local primal dofs */ 3259 old_size = -1; 3260 if (pcbddc->vec1_P) { 3261 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3262 } 3263 if (pcbddc->local_primal_size != old_size) { 3264 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3265 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3266 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3267 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3268 } 3269 /* local explicit constraints */ 3270 old_size = -1; 3271 if (pcbddc->vec1_C) { 3272 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3273 } 3274 if (n_constraints && n_constraints != old_size) { 3275 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3276 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3277 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3278 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3279 } 3280 PetscFunctionReturn(0); 3281 } 3282 3283 #undef __FUNCT__ 3284 #define __FUNCT__ "PCBDDCSetUpCorrection" 3285 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3286 { 3287 PetscErrorCode ierr; 3288 /* pointers to pcis and pcbddc */ 3289 PC_IS* pcis = (PC_IS*)pc->data; 3290 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3291 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3292 /* submatrices of local problem */ 3293 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3294 /* submatrices of local coarse problem */ 3295 Mat S_VV,S_CV,S_VC,S_CC; 3296 /* working matrices */ 3297 Mat C_CR; 3298 /* additional working stuff */ 3299 PC pc_R; 3300 Mat F; 3301 Vec dummy_vec; 3302 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3303 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3304 PetscScalar *work; 3305 PetscInt *idx_V_B; 3306 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3307 PetscInt i,n_R,n_D,n_B; 3308 3309 /* some shortcuts to scalars */ 3310 PetscScalar one=1.0,m_one=-1.0; 3311 3312 PetscFunctionBegin; 3313 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3314 3315 /* Set Non-overlapping dimensions */ 3316 n_vertices = pcbddc->n_vertices; 3317 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3318 n_B = pcis->n_B; 3319 n_D = pcis->n - n_B; 3320 n_R = pcis->n - n_vertices; 3321 3322 /* vertices in boundary numbering */ 3323 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3324 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3325 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3326 3327 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3328 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3329 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3330 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3331 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3332 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3333 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3334 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3335 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3336 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3337 3338 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3339 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3340 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3341 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3342 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3343 lda_rhs = n_R; 3344 need_benign_correction = PETSC_FALSE; 3345 if (isLU || isILU || isCHOL) { 3346 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3347 } else if (sub_schurs && sub_schurs->reuse_solver) { 3348 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3349 MatFactorType type; 3350 3351 F = reuse_solver->F; 3352 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3353 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3354 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3355 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3356 } else { 3357 F = NULL; 3358 } 3359 3360 /* allocate workspace */ 3361 n = 0; 3362 if (n_constraints) { 3363 n += lda_rhs*n_constraints; 3364 } 3365 if (n_vertices) { 3366 n = PetscMax(2*lda_rhs*n_vertices,n); 3367 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3368 } 3369 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3370 3371 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3372 dummy_vec = NULL; 3373 if (need_benign_correction && lda_rhs != n_R && F) { 3374 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3375 } 3376 3377 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3378 if (n_constraints) { 3379 Mat M1,M2,M3,C_B; 3380 IS is_aux; 3381 PetscScalar *array,*array2; 3382 3383 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3384 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3385 3386 /* Extract constraints on R nodes: C_{CR} */ 3387 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3388 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3389 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3390 3391 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3392 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3393 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3394 for (i=0;i<n_constraints;i++) { 3395 const PetscScalar *row_cmat_values; 3396 const PetscInt *row_cmat_indices; 3397 PetscInt size_of_constraint,j; 3398 3399 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3400 for (j=0;j<size_of_constraint;j++) { 3401 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3402 } 3403 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3404 } 3405 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3406 if (F) { 3407 Mat B; 3408 3409 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3410 if (need_benign_correction) { 3411 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3412 3413 /* rhs is already zero on interior dofs, no need to change the rhs */ 3414 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3415 } 3416 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3417 if (need_benign_correction) { 3418 PetscScalar *marr; 3419 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3420 3421 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3422 if (lda_rhs != n_R) { 3423 for (i=0;i<n_constraints;i++) { 3424 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3425 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3426 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3427 } 3428 } else { 3429 for (i=0;i<n_constraints;i++) { 3430 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3431 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3432 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3433 } 3434 } 3435 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3436 } 3437 ierr = MatDestroy(&B);CHKERRQ(ierr); 3438 } else { 3439 PetscScalar *marr; 3440 3441 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3442 for (i=0;i<n_constraints;i++) { 3443 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3444 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3445 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3446 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3447 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3448 } 3449 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3450 } 3451 if (!pcbddc->switch_static) { 3452 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3453 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3454 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3455 for (i=0;i<n_constraints;i++) { 3456 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3457 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3458 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3459 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3460 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3461 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3462 } 3463 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3464 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3465 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3466 } else { 3467 if (lda_rhs != n_R) { 3468 IS dummy; 3469 3470 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3471 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3472 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3473 } else { 3474 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3475 pcbddc->local_auxmat2 = local_auxmat2_R; 3476 } 3477 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3478 } 3479 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3480 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3481 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3482 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3483 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3484 if (isCHOL) { 3485 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3486 } else { 3487 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3488 } 3489 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3490 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3491 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3492 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3493 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3494 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3495 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3496 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3497 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3498 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3499 } 3500 3501 /* Get submatrices from subdomain matrix */ 3502 if (n_vertices) { 3503 IS is_aux; 3504 3505 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3506 IS tis; 3507 3508 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3509 ierr = ISSort(tis);CHKERRQ(ierr); 3510 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3511 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3512 } else { 3513 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3514 } 3515 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3516 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3517 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3518 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3519 } 3520 3521 /* Matrix of coarse basis functions (local) */ 3522 if (pcbddc->coarse_phi_B) { 3523 PetscInt on_B,on_primal,on_D=n_D; 3524 if (pcbddc->coarse_phi_D) { 3525 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3526 } 3527 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3528 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3529 PetscScalar *marray; 3530 3531 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3532 ierr = PetscFree(marray);CHKERRQ(ierr); 3533 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3534 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3535 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3536 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3537 } 3538 } 3539 3540 if (!pcbddc->coarse_phi_B) { 3541 PetscScalar *marray; 3542 3543 n = n_B*pcbddc->local_primal_size; 3544 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3545 n += n_D*pcbddc->local_primal_size; 3546 } 3547 if (!pcbddc->symmetric_primal) { 3548 n *= 2; 3549 } 3550 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3551 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3552 n = n_B*pcbddc->local_primal_size; 3553 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3554 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3555 n += n_D*pcbddc->local_primal_size; 3556 } 3557 if (!pcbddc->symmetric_primal) { 3558 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3559 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3560 n = n_B*pcbddc->local_primal_size; 3561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3562 } 3563 } else { 3564 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3565 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3566 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3567 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3568 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3569 } 3570 } 3571 } 3572 3573 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3574 p0_lidx_I = NULL; 3575 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3576 const PetscInt *idxs; 3577 3578 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3579 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3580 for (i=0;i<pcbddc->benign_n;i++) { 3581 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3582 } 3583 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3584 } 3585 3586 /* vertices */ 3587 if (n_vertices) { 3588 3589 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3590 3591 if (n_R) { 3592 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3593 PetscBLASInt B_N,B_one = 1; 3594 PetscScalar *x,*y; 3595 PetscBool isseqaij; 3596 3597 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3598 if (need_benign_correction) { 3599 ISLocalToGlobalMapping RtoN; 3600 IS is_p0; 3601 PetscInt *idxs_p0,n; 3602 3603 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3604 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3605 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3606 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3607 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3608 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3609 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3610 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3611 } 3612 3613 if (lda_rhs == n_R) { 3614 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3615 } else { 3616 PetscScalar *av,*array; 3617 const PetscInt *xadj,*adjncy; 3618 PetscInt n; 3619 PetscBool flg_row; 3620 3621 array = work+lda_rhs*n_vertices; 3622 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3623 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3624 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3625 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3626 for (i=0;i<n;i++) { 3627 PetscInt j; 3628 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3629 } 3630 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3631 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3632 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3633 } 3634 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3635 if (need_benign_correction) { 3636 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3637 PetscScalar *marr; 3638 3639 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3640 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3641 3642 | 0 0 0 | (V) 3643 L = | 0 0 -1 | (P-p0) 3644 | 0 0 -1 | (p0) 3645 3646 */ 3647 for (i=0;i<reuse_solver->benign_n;i++) { 3648 const PetscScalar *vals; 3649 const PetscInt *idxs,*idxs_zero; 3650 PetscInt n,j,nz; 3651 3652 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3653 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3654 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3655 for (j=0;j<n;j++) { 3656 PetscScalar val = vals[j]; 3657 PetscInt k,col = idxs[j]; 3658 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3659 } 3660 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3661 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3662 } 3663 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3664 } 3665 if (F) { 3666 /* need to correct the rhs */ 3667 if (need_benign_correction) { 3668 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3669 PetscScalar *marr; 3670 3671 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3672 if (lda_rhs != n_R) { 3673 for (i=0;i<n_vertices;i++) { 3674 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3675 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3676 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3677 } 3678 } else { 3679 for (i=0;i<n_vertices;i++) { 3680 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3681 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3682 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3683 } 3684 } 3685 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3686 } 3687 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3688 /* need to correct the solution */ 3689 if (need_benign_correction) { 3690 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3691 PetscScalar *marr; 3692 3693 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3694 if (lda_rhs != n_R) { 3695 for (i=0;i<n_vertices;i++) { 3696 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3697 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3698 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3699 } 3700 } else { 3701 for (i=0;i<n_vertices;i++) { 3702 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3703 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3704 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3705 } 3706 } 3707 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3708 } 3709 } else { 3710 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3711 for (i=0;i<n_vertices;i++) { 3712 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3713 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3714 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3715 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3716 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3717 } 3718 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3719 } 3720 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3721 /* S_VV and S_CV */ 3722 if (n_constraints) { 3723 Mat B; 3724 3725 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3726 for (i=0;i<n_vertices;i++) { 3727 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3728 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3729 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3730 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3731 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3732 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3733 } 3734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3735 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3736 ierr = MatDestroy(&B);CHKERRQ(ierr); 3737 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3738 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3739 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3740 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3741 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3742 ierr = MatDestroy(&B);CHKERRQ(ierr); 3743 } 3744 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3745 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3746 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3747 } 3748 if (lda_rhs != n_R) { 3749 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3750 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3751 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3752 } 3753 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3754 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3755 if (need_benign_correction) { 3756 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3757 PetscScalar *marr,*sums; 3758 3759 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3760 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3761 for (i=0;i<reuse_solver->benign_n;i++) { 3762 const PetscScalar *vals; 3763 const PetscInt *idxs,*idxs_zero; 3764 PetscInt n,j,nz; 3765 3766 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3767 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3768 for (j=0;j<n_vertices;j++) { 3769 PetscInt k; 3770 sums[j] = 0.; 3771 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3772 } 3773 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3774 for (j=0;j<n;j++) { 3775 PetscScalar val = vals[j]; 3776 PetscInt k; 3777 for (k=0;k<n_vertices;k++) { 3778 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3779 } 3780 } 3781 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3782 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3783 } 3784 ierr = PetscFree(sums);CHKERRQ(ierr); 3785 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3786 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3787 } 3788 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3789 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3790 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3791 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3792 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3793 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3794 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3795 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3796 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3797 } else { 3798 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3799 } 3800 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3801 3802 /* coarse basis functions */ 3803 for (i=0;i<n_vertices;i++) { 3804 PetscScalar *y; 3805 3806 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3807 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3808 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3809 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3810 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3811 y[n_B*i+idx_V_B[i]] = 1.0; 3812 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3813 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3814 3815 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3816 PetscInt j; 3817 3818 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3819 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3820 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3821 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3822 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3823 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3824 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3825 } 3826 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3827 } 3828 /* if n_R == 0 the object is not destroyed */ 3829 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3830 } 3831 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3832 3833 if (n_constraints) { 3834 Mat B; 3835 3836 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3837 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3838 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3839 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3840 if (n_vertices) { 3841 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3842 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3843 } else { 3844 Mat S_VCt; 3845 3846 if (lda_rhs != n_R) { 3847 ierr = MatDestroy(&B);CHKERRQ(ierr); 3848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3849 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3850 } 3851 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3852 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3853 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3854 } 3855 } 3856 ierr = MatDestroy(&B);CHKERRQ(ierr); 3857 /* coarse basis functions */ 3858 for (i=0;i<n_constraints;i++) { 3859 PetscScalar *y; 3860 3861 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3862 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3863 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3864 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3865 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3866 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3867 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3868 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3869 PetscInt j; 3870 3871 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3872 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3873 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3874 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3875 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3876 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3877 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3878 } 3879 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3880 } 3881 } 3882 if (n_constraints) { 3883 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3884 } 3885 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3886 3887 /* coarse matrix entries relative to B_0 */ 3888 if (pcbddc->benign_n) { 3889 Mat B0_B,B0_BPHI; 3890 IS is_dummy; 3891 PetscScalar *data; 3892 PetscInt j; 3893 3894 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3895 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3896 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3897 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3898 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3899 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3900 for (j=0;j<pcbddc->benign_n;j++) { 3901 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3902 for (i=0;i<pcbddc->local_primal_size;i++) { 3903 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3904 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3905 } 3906 } 3907 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3908 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3909 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3910 } 3911 3912 /* compute other basis functions for non-symmetric problems */ 3913 if (!pcbddc->symmetric_primal) { 3914 Mat B_V=NULL,B_C=NULL; 3915 PetscScalar *marray; 3916 3917 if (n_constraints) { 3918 Mat S_CCT,C_CRT; 3919 3920 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3921 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3922 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3923 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3924 if (n_vertices) { 3925 Mat S_VCT; 3926 3927 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3928 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3929 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3930 } 3931 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3932 } else { 3933 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3934 } 3935 if (n_vertices && n_R) { 3936 PetscScalar *av,*marray; 3937 const PetscInt *xadj,*adjncy; 3938 PetscInt n; 3939 PetscBool flg_row; 3940 3941 /* B_V = B_V - A_VR^T */ 3942 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3943 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3944 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3945 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3946 for (i=0;i<n;i++) { 3947 PetscInt j; 3948 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3949 } 3950 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3951 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3952 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3953 } 3954 3955 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3956 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3957 for (i=0;i<n_vertices;i++) { 3958 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3959 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3960 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3961 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3962 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3963 } 3964 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3965 if (B_C) { 3966 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3967 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3968 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3969 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3970 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3971 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3972 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3973 } 3974 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3975 } 3976 /* coarse basis functions */ 3977 for (i=0;i<pcbddc->local_primal_size;i++) { 3978 PetscScalar *y; 3979 3980 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3981 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3982 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3983 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3984 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3985 if (i<n_vertices) { 3986 y[n_B*i+idx_V_B[i]] = 1.0; 3987 } 3988 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3989 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3990 3991 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3992 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3993 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3994 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3995 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3996 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3997 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3998 } 3999 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4000 } 4001 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4002 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4003 } 4004 /* free memory */ 4005 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4006 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4007 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4008 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4009 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4010 ierr = PetscFree(work);CHKERRQ(ierr); 4011 if (n_vertices) { 4012 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4013 } 4014 if (n_constraints) { 4015 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4016 } 4017 /* Checking coarse_sub_mat and coarse basis functios */ 4018 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4019 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4020 if (pcbddc->dbg_flag) { 4021 Mat coarse_sub_mat; 4022 Mat AUXMAT,TM1,TM2,TM3,TM4; 4023 Mat coarse_phi_D,coarse_phi_B; 4024 Mat coarse_psi_D,coarse_psi_B; 4025 Mat A_II,A_BB,A_IB,A_BI; 4026 Mat C_B,CPHI; 4027 IS is_dummy; 4028 Vec mones; 4029 MatType checkmattype=MATSEQAIJ; 4030 PetscReal real_value; 4031 4032 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4033 Mat A; 4034 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4035 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4036 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4037 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4038 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4039 ierr = MatDestroy(&A);CHKERRQ(ierr); 4040 } else { 4041 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4042 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4043 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4044 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4045 } 4046 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4047 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4048 if (!pcbddc->symmetric_primal) { 4049 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4050 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4051 } 4052 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4053 4054 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4055 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4056 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4057 if (!pcbddc->symmetric_primal) { 4058 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4059 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4061 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4062 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4063 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4064 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4065 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4066 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4067 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4068 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4070 } else { 4071 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4072 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4073 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4074 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4075 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4076 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4077 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4078 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4079 } 4080 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4081 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4082 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4083 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4084 if (pcbddc->benign_n) { 4085 Mat B0_B,B0_BPHI; 4086 PetscScalar *data,*data2; 4087 PetscInt j; 4088 4089 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4090 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4091 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4092 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4093 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4094 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4095 for (j=0;j<pcbddc->benign_n;j++) { 4096 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4097 for (i=0;i<pcbddc->local_primal_size;i++) { 4098 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4099 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4100 } 4101 } 4102 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4103 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4104 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4105 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4106 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4107 } 4108 #if 0 4109 { 4110 PetscViewer viewer; 4111 char filename[256]; 4112 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4113 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4114 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4115 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4116 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4117 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4118 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4119 if (save_change) { 4120 Mat phi_B; 4121 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4122 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4123 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4124 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4125 } else { 4126 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4127 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4128 } 4129 if (pcbddc->coarse_phi_D) { 4130 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4131 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4132 } 4133 if (pcbddc->coarse_psi_B) { 4134 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4135 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4136 } 4137 if (pcbddc->coarse_psi_D) { 4138 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4139 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4140 } 4141 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4142 } 4143 #endif 4144 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4145 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4146 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4147 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4148 4149 /* check constraints */ 4150 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4151 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4152 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4153 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4154 } else { 4155 PetscScalar *data; 4156 Mat tmat; 4157 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4158 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4159 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4160 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4161 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4162 } 4163 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4164 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4165 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4166 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4167 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4168 if (!pcbddc->symmetric_primal) { 4169 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4170 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4171 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4172 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4173 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4174 } 4175 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4176 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4177 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4178 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4179 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4180 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4181 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4182 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4183 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4184 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4185 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4186 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4187 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4188 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4189 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4190 if (!pcbddc->symmetric_primal) { 4191 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4192 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4193 } 4194 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4195 } 4196 /* get back data */ 4197 *coarse_submat_vals_n = coarse_submat_vals; 4198 PetscFunctionReturn(0); 4199 } 4200 4201 #undef __FUNCT__ 4202 #define __FUNCT__ "MatGetSubMatrixUnsorted" 4203 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4204 { 4205 Mat *work_mat; 4206 IS isrow_s,iscol_s; 4207 PetscBool rsorted,csorted; 4208 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4209 PetscErrorCode ierr; 4210 4211 PetscFunctionBegin; 4212 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4213 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4214 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4215 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4216 4217 if (!rsorted) { 4218 const PetscInt *idxs; 4219 PetscInt *idxs_sorted,i; 4220 4221 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4222 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4223 for (i=0;i<rsize;i++) { 4224 idxs_perm_r[i] = i; 4225 } 4226 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4227 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4228 for (i=0;i<rsize;i++) { 4229 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4230 } 4231 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4232 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4233 } else { 4234 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4235 isrow_s = isrow; 4236 } 4237 4238 if (!csorted) { 4239 if (isrow == iscol) { 4240 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4241 iscol_s = isrow_s; 4242 } else { 4243 const PetscInt *idxs; 4244 PetscInt *idxs_sorted,i; 4245 4246 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4247 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4248 for (i=0;i<csize;i++) { 4249 idxs_perm_c[i] = i; 4250 } 4251 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4252 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4253 for (i=0;i<csize;i++) { 4254 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4255 } 4256 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4257 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4258 } 4259 } else { 4260 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4261 iscol_s = iscol; 4262 } 4263 4264 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4265 4266 if (!rsorted || !csorted) { 4267 Mat new_mat; 4268 IS is_perm_r,is_perm_c; 4269 4270 if (!rsorted) { 4271 PetscInt *idxs_r,i; 4272 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4273 for (i=0;i<rsize;i++) { 4274 idxs_r[idxs_perm_r[i]] = i; 4275 } 4276 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4277 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4278 } else { 4279 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4280 } 4281 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4282 4283 if (!csorted) { 4284 if (isrow_s == iscol_s) { 4285 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4286 is_perm_c = is_perm_r; 4287 } else { 4288 PetscInt *idxs_c,i; 4289 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4290 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4291 for (i=0;i<csize;i++) { 4292 idxs_c[idxs_perm_c[i]] = i; 4293 } 4294 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4295 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4296 } 4297 } else { 4298 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4299 } 4300 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4301 4302 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4303 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4304 work_mat[0] = new_mat; 4305 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4306 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4307 } 4308 4309 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4310 *B = work_mat[0]; 4311 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4312 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4313 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4314 PetscFunctionReturn(0); 4315 } 4316 4317 #undef __FUNCT__ 4318 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4319 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4320 { 4321 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4322 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4323 Mat new_mat; 4324 IS is_local,is_global; 4325 PetscInt local_size; 4326 PetscBool isseqaij; 4327 PetscErrorCode ierr; 4328 4329 PetscFunctionBegin; 4330 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4331 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4332 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4333 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4334 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4335 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4336 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4337 4338 /* check */ 4339 if (pcbddc->dbg_flag) { 4340 Vec x,x_change; 4341 PetscReal error; 4342 4343 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4344 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4345 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4346 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4347 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4348 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4349 if (!pcbddc->change_interior) { 4350 const PetscScalar *x,*y,*v; 4351 PetscReal lerror = 0.; 4352 PetscInt i; 4353 4354 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4355 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4356 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4357 for (i=0;i<local_size;i++) 4358 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4359 lerror = PetscAbsScalar(x[i]-y[i]); 4360 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4361 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4362 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4363 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4364 if (error > PETSC_SMALL) { 4365 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4366 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4367 } else { 4368 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4369 } 4370 } 4371 } 4372 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4373 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4374 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4375 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4376 if (error > PETSC_SMALL) { 4377 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4378 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4379 } else { 4380 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4381 } 4382 } 4383 ierr = VecDestroy(&x);CHKERRQ(ierr); 4384 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4385 } 4386 4387 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4388 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4389 if (isseqaij) { 4390 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4391 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4392 } else { 4393 Mat work_mat; 4394 4395 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4396 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4397 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4398 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4399 } 4400 if (matis->A->symmetric_set) { 4401 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4402 #if !defined(PETSC_USE_COMPLEX) 4403 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4404 #endif 4405 } 4406 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4407 PetscFunctionReturn(0); 4408 } 4409 4410 #undef __FUNCT__ 4411 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4412 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4413 { 4414 PC_IS* pcis = (PC_IS*)(pc->data); 4415 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4416 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4417 PetscInt *idx_R_local=NULL; 4418 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4419 PetscInt vbs,bs; 4420 PetscBT bitmask=NULL; 4421 PetscErrorCode ierr; 4422 4423 PetscFunctionBegin; 4424 /* 4425 No need to setup local scatters if 4426 - primal space is unchanged 4427 AND 4428 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4429 AND 4430 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4431 */ 4432 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4433 PetscFunctionReturn(0); 4434 } 4435 /* destroy old objects */ 4436 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4437 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4438 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4439 /* Set Non-overlapping dimensions */ 4440 n_B = pcis->n_B; 4441 n_D = pcis->n - n_B; 4442 n_vertices = pcbddc->n_vertices; 4443 4444 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4445 4446 /* create auxiliary bitmask and allocate workspace */ 4447 if (!sub_schurs || !sub_schurs->reuse_solver) { 4448 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4449 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4450 for (i=0;i<n_vertices;i++) { 4451 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4452 } 4453 4454 for (i=0, n_R=0; i<pcis->n; i++) { 4455 if (!PetscBTLookup(bitmask,i)) { 4456 idx_R_local[n_R++] = i; 4457 } 4458 } 4459 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4460 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4461 4462 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4463 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4464 } 4465 4466 /* Block code */ 4467 vbs = 1; 4468 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4469 if (bs>1 && !(n_vertices%bs)) { 4470 PetscBool is_blocked = PETSC_TRUE; 4471 PetscInt *vary; 4472 if (!sub_schurs || !sub_schurs->reuse_solver) { 4473 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4474 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4475 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4476 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4477 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4478 for (i=0; i<pcis->n/bs; i++) { 4479 if (vary[i]!=0 && vary[i]!=bs) { 4480 is_blocked = PETSC_FALSE; 4481 break; 4482 } 4483 } 4484 ierr = PetscFree(vary);CHKERRQ(ierr); 4485 } else { 4486 /* Verify directly the R set */ 4487 for (i=0; i<n_R/bs; i++) { 4488 PetscInt j,node=idx_R_local[bs*i]; 4489 for (j=1; j<bs; j++) { 4490 if (node != idx_R_local[bs*i+j]-j) { 4491 is_blocked = PETSC_FALSE; 4492 break; 4493 } 4494 } 4495 } 4496 } 4497 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4498 vbs = bs; 4499 for (i=0;i<n_R/vbs;i++) { 4500 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4501 } 4502 } 4503 } 4504 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4505 if (sub_schurs && sub_schurs->reuse_solver) { 4506 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4507 4508 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4509 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4510 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4511 reuse_solver->is_R = pcbddc->is_R_local; 4512 } else { 4513 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4514 } 4515 4516 /* print some info if requested */ 4517 if (pcbddc->dbg_flag) { 4518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4520 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4521 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4523 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4524 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4525 } 4526 4527 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4528 if (!sub_schurs || !sub_schurs->reuse_solver) { 4529 IS is_aux1,is_aux2; 4530 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4531 4532 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4533 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4534 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4535 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4536 for (i=0; i<n_D; i++) { 4537 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4538 } 4539 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4540 for (i=0, j=0; i<n_R; i++) { 4541 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4542 aux_array1[j++] = i; 4543 } 4544 } 4545 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4546 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4547 for (i=0, j=0; i<n_B; i++) { 4548 if (!PetscBTLookup(bitmask,is_indices[i])) { 4549 aux_array2[j++] = i; 4550 } 4551 } 4552 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4553 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4554 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4555 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4556 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4557 4558 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4559 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4560 for (i=0, j=0; i<n_R; i++) { 4561 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4562 aux_array1[j++] = i; 4563 } 4564 } 4565 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4566 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4567 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4568 } 4569 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4570 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4571 } else { 4572 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4573 IS tis; 4574 PetscInt schur_size; 4575 4576 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4577 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4578 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4579 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4580 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4581 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4582 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4583 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4584 } 4585 } 4586 PetscFunctionReturn(0); 4587 } 4588 4589 4590 #undef __FUNCT__ 4591 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4592 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4593 { 4594 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4595 PC_IS *pcis = (PC_IS*)pc->data; 4596 PC pc_temp; 4597 Mat A_RR; 4598 MatReuse reuse; 4599 PetscScalar m_one = -1.0; 4600 PetscReal value; 4601 PetscInt n_D,n_R; 4602 PetscBool check_corr[2],issbaij; 4603 PetscErrorCode ierr; 4604 /* prefixes stuff */ 4605 char dir_prefix[256],neu_prefix[256],str_level[16]; 4606 size_t len; 4607 4608 PetscFunctionBegin; 4609 4610 /* compute prefixes */ 4611 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4612 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4613 if (!pcbddc->current_level) { 4614 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4615 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4616 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4617 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4618 } else { 4619 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4620 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4621 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4622 len -= 15; /* remove "pc_bddc_coarse_" */ 4623 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4624 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4625 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4626 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4627 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4628 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4629 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4630 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4631 } 4632 4633 /* DIRICHLET PROBLEM */ 4634 if (dirichlet) { 4635 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4636 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4637 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4638 if (pcbddc->dbg_flag) { 4639 Mat A_IIn; 4640 4641 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4642 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4643 pcis->A_II = A_IIn; 4644 } 4645 } 4646 if (pcbddc->local_mat->symmetric_set) { 4647 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4648 } 4649 /* Matrix for Dirichlet problem is pcis->A_II */ 4650 n_D = pcis->n - pcis->n_B; 4651 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4652 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4653 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4654 /* default */ 4655 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4656 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4657 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4658 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4659 if (issbaij) { 4660 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4661 } else { 4662 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4663 } 4664 /* Allow user's customization */ 4665 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4666 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4667 } 4668 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4669 if (sub_schurs && sub_schurs->reuse_solver) { 4670 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4671 4672 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4673 } 4674 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4675 if (!n_D) { 4676 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4677 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4678 } 4679 /* Set Up KSP for Dirichlet problem of BDDC */ 4680 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4681 /* set ksp_D into pcis data */ 4682 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4683 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4684 pcis->ksp_D = pcbddc->ksp_D; 4685 } 4686 4687 /* NEUMANN PROBLEM */ 4688 A_RR = 0; 4689 if (neumann) { 4690 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4691 PetscInt ibs,mbs; 4692 PetscBool issbaij; 4693 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4694 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4695 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4696 if (pcbddc->ksp_R) { /* already created ksp */ 4697 PetscInt nn_R; 4698 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4699 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4700 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4701 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4702 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4703 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4704 reuse = MAT_INITIAL_MATRIX; 4705 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4706 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4707 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4708 reuse = MAT_INITIAL_MATRIX; 4709 } else { /* safe to reuse the matrix */ 4710 reuse = MAT_REUSE_MATRIX; 4711 } 4712 } 4713 /* last check */ 4714 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4715 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4716 reuse = MAT_INITIAL_MATRIX; 4717 } 4718 } else { /* first time, so we need to create the matrix */ 4719 reuse = MAT_INITIAL_MATRIX; 4720 } 4721 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4722 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4723 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4724 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4725 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4726 if (matis->A == pcbddc->local_mat) { 4727 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4728 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4729 } else { 4730 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4731 } 4732 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4733 if (matis->A == pcbddc->local_mat) { 4734 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4735 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4736 } else { 4737 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4738 } 4739 } 4740 /* extract A_RR */ 4741 if (sub_schurs && sub_schurs->reuse_solver) { 4742 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4743 4744 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4745 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4746 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4747 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4748 } else { 4749 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4750 } 4751 } else { 4752 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4753 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4754 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4755 } 4756 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4757 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4758 } 4759 if (pcbddc->local_mat->symmetric_set) { 4760 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4761 } 4762 if (!pcbddc->ksp_R) { /* create object if not present */ 4763 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4764 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4765 /* default */ 4766 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4767 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4768 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4769 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4770 if (issbaij) { 4771 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4772 } else { 4773 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4774 } 4775 /* Allow user's customization */ 4776 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4777 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4778 } 4779 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4780 if (!n_R) { 4781 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4782 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4783 } 4784 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4785 /* Reuse solver if it is present */ 4786 if (sub_schurs && sub_schurs->reuse_solver) { 4787 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4788 4789 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4790 } 4791 /* Set Up KSP for Neumann problem of BDDC */ 4792 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4793 } 4794 4795 if (pcbddc->dbg_flag) { 4796 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4797 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4798 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4799 } 4800 4801 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4802 check_corr[0] = check_corr[1] = PETSC_FALSE; 4803 if (pcbddc->NullSpace_corr[0]) { 4804 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4805 } 4806 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4807 check_corr[0] = PETSC_TRUE; 4808 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4809 } 4810 if (neumann && pcbddc->NullSpace_corr[2]) { 4811 check_corr[1] = PETSC_TRUE; 4812 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4813 } 4814 4815 /* check Dirichlet and Neumann solvers */ 4816 if (pcbddc->dbg_flag) { 4817 if (dirichlet) { /* Dirichlet */ 4818 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4819 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4820 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4821 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4822 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4823 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4824 if (check_corr[0]) { 4825 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4826 } 4827 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4828 } 4829 if (neumann) { /* Neumann */ 4830 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4831 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4832 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4833 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4834 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4835 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4836 if (check_corr[1]) { 4837 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4838 } 4839 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4840 } 4841 } 4842 /* free Neumann problem's matrix */ 4843 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4844 PetscFunctionReturn(0); 4845 } 4846 4847 #undef __FUNCT__ 4848 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4849 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4850 { 4851 PetscErrorCode ierr; 4852 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4853 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4854 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4855 4856 PetscFunctionBegin; 4857 if (!reuse_solver) { 4858 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4859 } 4860 if (!pcbddc->switch_static) { 4861 if (applytranspose && pcbddc->local_auxmat1) { 4862 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4863 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4864 } 4865 if (!reuse_solver) { 4866 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4867 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4868 } else { 4869 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4870 4871 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4872 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4873 } 4874 } else { 4875 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4876 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4877 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4878 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4879 if (applytranspose && pcbddc->local_auxmat1) { 4880 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4881 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4882 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4883 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4884 } 4885 } 4886 if (!reuse_solver || pcbddc->switch_static) { 4887 if (applytranspose) { 4888 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4889 } else { 4890 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4891 } 4892 } else { 4893 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4894 4895 if (applytranspose) { 4896 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4897 } else { 4898 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4899 } 4900 } 4901 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4902 if (!pcbddc->switch_static) { 4903 if (!reuse_solver) { 4904 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4905 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4906 } else { 4907 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4908 4909 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4910 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4911 } 4912 if (!applytranspose && pcbddc->local_auxmat1) { 4913 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4914 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4915 } 4916 } else { 4917 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4918 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4919 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4920 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4921 if (!applytranspose && pcbddc->local_auxmat1) { 4922 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4923 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4924 } 4925 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4926 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4927 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4928 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4929 } 4930 PetscFunctionReturn(0); 4931 } 4932 4933 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4934 #undef __FUNCT__ 4935 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4936 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4937 { 4938 PetscErrorCode ierr; 4939 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4940 PC_IS* pcis = (PC_IS*) (pc->data); 4941 const PetscScalar zero = 0.0; 4942 4943 PetscFunctionBegin; 4944 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4945 if (!pcbddc->benign_apply_coarse_only) { 4946 if (applytranspose) { 4947 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4948 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4949 } else { 4950 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4951 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4952 } 4953 } else { 4954 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4955 } 4956 4957 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4958 if (pcbddc->benign_n) { 4959 PetscScalar *array; 4960 PetscInt j; 4961 4962 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4963 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4964 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4965 } 4966 4967 /* start communications from local primal nodes to rhs of coarse solver */ 4968 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4969 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4970 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4971 4972 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4973 if (pcbddc->coarse_ksp) { 4974 Mat coarse_mat; 4975 Vec rhs,sol; 4976 MatNullSpace nullsp; 4977 PetscBool isbddc = PETSC_FALSE; 4978 4979 if (pcbddc->benign_have_null) { 4980 PC coarse_pc; 4981 4982 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4983 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4984 /* we need to propagate to coarser levels the need for a possible benign correction */ 4985 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4986 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4987 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4988 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4989 } 4990 } 4991 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4992 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4993 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4994 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4995 if (nullsp) { 4996 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4997 } 4998 if (applytranspose) { 4999 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5000 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5001 } else { 5002 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5003 PC coarse_pc; 5004 5005 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5006 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5007 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5008 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5009 } else { 5010 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5011 } 5012 } 5013 /* we don't need the benign correction at coarser levels anymore */ 5014 if (pcbddc->benign_have_null && isbddc) { 5015 PC coarse_pc; 5016 PC_BDDC* coarsepcbddc; 5017 5018 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5019 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5020 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5021 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5022 } 5023 if (nullsp) { 5024 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5025 } 5026 } 5027 5028 /* Local solution on R nodes */ 5029 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5030 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5031 } 5032 /* communications from coarse sol to local primal nodes */ 5033 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5034 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5035 5036 /* Sum contributions from the two levels */ 5037 if (!pcbddc->benign_apply_coarse_only) { 5038 if (applytranspose) { 5039 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5040 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5041 } else { 5042 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5043 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5044 } 5045 /* store p0 */ 5046 if (pcbddc->benign_n) { 5047 PetscScalar *array; 5048 PetscInt j; 5049 5050 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5051 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5052 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5053 } 5054 } else { /* expand the coarse solution */ 5055 if (applytranspose) { 5056 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5057 } else { 5058 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5059 } 5060 } 5061 PetscFunctionReturn(0); 5062 } 5063 5064 #undef __FUNCT__ 5065 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 5066 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5067 { 5068 PetscErrorCode ierr; 5069 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5070 PetscScalar *array; 5071 Vec from,to; 5072 5073 PetscFunctionBegin; 5074 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5075 from = pcbddc->coarse_vec; 5076 to = pcbddc->vec1_P; 5077 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5078 Vec tvec; 5079 5080 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5081 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5082 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5083 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5084 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5085 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5086 } 5087 } else { /* from local to global -> put data in coarse right hand side */ 5088 from = pcbddc->vec1_P; 5089 to = pcbddc->coarse_vec; 5090 } 5091 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5092 PetscFunctionReturn(0); 5093 } 5094 5095 #undef __FUNCT__ 5096 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 5097 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5098 { 5099 PetscErrorCode ierr; 5100 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5101 PetscScalar *array; 5102 Vec from,to; 5103 5104 PetscFunctionBegin; 5105 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5106 from = pcbddc->coarse_vec; 5107 to = pcbddc->vec1_P; 5108 } else { /* from local to global -> put data in coarse right hand side */ 5109 from = pcbddc->vec1_P; 5110 to = pcbddc->coarse_vec; 5111 } 5112 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5113 if (smode == SCATTER_FORWARD) { 5114 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5115 Vec tvec; 5116 5117 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5118 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5119 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5120 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5121 } 5122 } else { 5123 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5124 ierr = VecResetArray(from);CHKERRQ(ierr); 5125 } 5126 } 5127 PetscFunctionReturn(0); 5128 } 5129 5130 /* uncomment for testing purposes */ 5131 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5132 #undef __FUNCT__ 5133 #define __FUNCT__ "PCBDDCConstraintsSetUp" 5134 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5135 { 5136 PetscErrorCode ierr; 5137 PC_IS* pcis = (PC_IS*)(pc->data); 5138 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5139 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5140 /* one and zero */ 5141 PetscScalar one=1.0,zero=0.0; 5142 /* space to store constraints and their local indices */ 5143 PetscScalar *constraints_data; 5144 PetscInt *constraints_idxs,*constraints_idxs_B; 5145 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5146 PetscInt *constraints_n; 5147 /* iterators */ 5148 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5149 /* BLAS integers */ 5150 PetscBLASInt lwork,lierr; 5151 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5152 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5153 /* reuse */ 5154 PetscInt olocal_primal_size,olocal_primal_size_cc; 5155 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5156 /* change of basis */ 5157 PetscBool qr_needed; 5158 PetscBT change_basis,qr_needed_idx; 5159 /* auxiliary stuff */ 5160 PetscInt *nnz,*is_indices; 5161 PetscInt ncc; 5162 /* some quantities */ 5163 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5164 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5165 5166 PetscFunctionBegin; 5167 /* Destroy Mat objects computed previously */ 5168 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5169 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5170 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5171 /* save info on constraints from previous setup (if any) */ 5172 olocal_primal_size = pcbddc->local_primal_size; 5173 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5174 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5175 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5176 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5177 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5178 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5179 5180 if (!pcbddc->adaptive_selection) { 5181 IS ISForVertices,*ISForFaces,*ISForEdges; 5182 MatNullSpace nearnullsp; 5183 const Vec *nearnullvecs; 5184 Vec *localnearnullsp; 5185 PetscScalar *array; 5186 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5187 PetscBool nnsp_has_cnst; 5188 /* LAPACK working arrays for SVD or POD */ 5189 PetscBool skip_lapack,boolforchange; 5190 PetscScalar *work; 5191 PetscReal *singular_vals; 5192 #if defined(PETSC_USE_COMPLEX) 5193 PetscReal *rwork; 5194 #endif 5195 #if defined(PETSC_MISSING_LAPACK_GESVD) 5196 PetscScalar *temp_basis,*correlation_mat; 5197 #else 5198 PetscBLASInt dummy_int=1; 5199 PetscScalar dummy_scalar=1.; 5200 #endif 5201 5202 /* Get index sets for faces, edges and vertices from graph */ 5203 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5204 /* print some info */ 5205 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5206 PetscInt nv; 5207 5208 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5209 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5210 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5211 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5212 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5213 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5215 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5216 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5217 } 5218 5219 /* free unneeded index sets */ 5220 if (!pcbddc->use_vertices) { 5221 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5222 } 5223 if (!pcbddc->use_edges) { 5224 for (i=0;i<n_ISForEdges;i++) { 5225 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5226 } 5227 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5228 n_ISForEdges = 0; 5229 } 5230 if (!pcbddc->use_faces) { 5231 for (i=0;i<n_ISForFaces;i++) { 5232 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5233 } 5234 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5235 n_ISForFaces = 0; 5236 } 5237 5238 /* check if near null space is attached to global mat */ 5239 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5240 if (nearnullsp) { 5241 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5242 /* remove any stored info */ 5243 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5244 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5245 /* store information for BDDC solver reuse */ 5246 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5247 pcbddc->onearnullspace = nearnullsp; 5248 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5249 for (i=0;i<nnsp_size;i++) { 5250 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5251 } 5252 } else { /* if near null space is not provided BDDC uses constants by default */ 5253 nnsp_size = 0; 5254 nnsp_has_cnst = PETSC_TRUE; 5255 } 5256 /* get max number of constraints on a single cc */ 5257 max_constraints = nnsp_size; 5258 if (nnsp_has_cnst) max_constraints++; 5259 5260 /* 5261 Evaluate maximum storage size needed by the procedure 5262 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5263 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5264 There can be multiple constraints per connected component 5265 */ 5266 n_vertices = 0; 5267 if (ISForVertices) { 5268 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5269 } 5270 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5271 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5272 5273 total_counts = n_ISForFaces+n_ISForEdges; 5274 total_counts *= max_constraints; 5275 total_counts += n_vertices; 5276 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5277 5278 total_counts = 0; 5279 max_size_of_constraint = 0; 5280 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5281 IS used_is; 5282 if (i<n_ISForEdges) { 5283 used_is = ISForEdges[i]; 5284 } else { 5285 used_is = ISForFaces[i-n_ISForEdges]; 5286 } 5287 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5288 total_counts += j; 5289 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5290 } 5291 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5292 5293 /* get local part of global near null space vectors */ 5294 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5295 for (k=0;k<nnsp_size;k++) { 5296 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5297 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5298 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5299 } 5300 5301 /* whether or not to skip lapack calls */ 5302 skip_lapack = PETSC_TRUE; 5303 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5304 5305 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5306 if (!skip_lapack) { 5307 PetscScalar temp_work; 5308 5309 #if defined(PETSC_MISSING_LAPACK_GESVD) 5310 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5311 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5312 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5313 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5314 #if defined(PETSC_USE_COMPLEX) 5315 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5316 #endif 5317 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5318 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5319 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5320 lwork = -1; 5321 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5322 #if !defined(PETSC_USE_COMPLEX) 5323 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5324 #else 5325 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5326 #endif 5327 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5328 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5329 #else /* on missing GESVD */ 5330 /* SVD */ 5331 PetscInt max_n,min_n; 5332 max_n = max_size_of_constraint; 5333 min_n = max_constraints; 5334 if (max_size_of_constraint < max_constraints) { 5335 min_n = max_size_of_constraint; 5336 max_n = max_constraints; 5337 } 5338 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5339 #if defined(PETSC_USE_COMPLEX) 5340 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5341 #endif 5342 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5343 lwork = -1; 5344 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5345 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5346 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5347 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5348 #if !defined(PETSC_USE_COMPLEX) 5349 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5350 #else 5351 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5352 #endif 5353 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5354 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5355 #endif /* on missing GESVD */ 5356 /* Allocate optimal workspace */ 5357 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5358 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5359 } 5360 /* Now we can loop on constraining sets */ 5361 total_counts = 0; 5362 constraints_idxs_ptr[0] = 0; 5363 constraints_data_ptr[0] = 0; 5364 /* vertices */ 5365 if (n_vertices) { 5366 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5367 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5368 for (i=0;i<n_vertices;i++) { 5369 constraints_n[total_counts] = 1; 5370 constraints_data[total_counts] = 1.0; 5371 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5372 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5373 total_counts++; 5374 } 5375 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5376 n_vertices = total_counts; 5377 } 5378 5379 /* edges and faces */ 5380 total_counts_cc = total_counts; 5381 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5382 IS used_is; 5383 PetscBool idxs_copied = PETSC_FALSE; 5384 5385 if (ncc<n_ISForEdges) { 5386 used_is = ISForEdges[ncc]; 5387 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5388 } else { 5389 used_is = ISForFaces[ncc-n_ISForEdges]; 5390 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5391 } 5392 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5393 5394 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5395 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5396 /* change of basis should not be performed on local periodic nodes */ 5397 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5398 if (nnsp_has_cnst) { 5399 PetscScalar quad_value; 5400 5401 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5402 idxs_copied = PETSC_TRUE; 5403 5404 if (!pcbddc->use_nnsp_true) { 5405 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5406 } else { 5407 quad_value = 1.0; 5408 } 5409 for (j=0;j<size_of_constraint;j++) { 5410 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5411 } 5412 temp_constraints++; 5413 total_counts++; 5414 } 5415 for (k=0;k<nnsp_size;k++) { 5416 PetscReal real_value; 5417 PetscScalar *ptr_to_data; 5418 5419 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5420 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5421 for (j=0;j<size_of_constraint;j++) { 5422 ptr_to_data[j] = array[is_indices[j]]; 5423 } 5424 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5425 /* check if array is null on the connected component */ 5426 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5427 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5428 if (real_value > 0.0) { /* keep indices and values */ 5429 temp_constraints++; 5430 total_counts++; 5431 if (!idxs_copied) { 5432 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5433 idxs_copied = PETSC_TRUE; 5434 } 5435 } 5436 } 5437 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5438 valid_constraints = temp_constraints; 5439 if (!pcbddc->use_nnsp_true && temp_constraints) { 5440 if (temp_constraints == 1) { /* just normalize the constraint */ 5441 PetscScalar norm,*ptr_to_data; 5442 5443 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5444 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5445 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5446 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5447 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5448 } else { /* perform SVD */ 5449 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5450 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5451 5452 #if defined(PETSC_MISSING_LAPACK_GESVD) 5453 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5454 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5455 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5456 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5457 from that computed using LAPACKgesvd 5458 -> This is due to a different computation of eigenvectors in LAPACKheev 5459 -> The quality of the POD-computed basis will be the same */ 5460 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5461 /* Store upper triangular part of correlation matrix */ 5462 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5463 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5464 for (j=0;j<temp_constraints;j++) { 5465 for (k=0;k<j+1;k++) { 5466 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5467 } 5468 } 5469 /* compute eigenvalues and eigenvectors of correlation matrix */ 5470 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5471 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5472 #if !defined(PETSC_USE_COMPLEX) 5473 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5474 #else 5475 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5476 #endif 5477 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5478 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5479 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5480 j = 0; 5481 while (j < temp_constraints && singular_vals[j] < tol) j++; 5482 total_counts = total_counts-j; 5483 valid_constraints = temp_constraints-j; 5484 /* scale and copy POD basis into used quadrature memory */ 5485 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5486 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5487 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5488 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5489 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5490 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5491 if (j<temp_constraints) { 5492 PetscInt ii; 5493 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5494 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5495 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5496 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5497 for (k=0;k<temp_constraints-j;k++) { 5498 for (ii=0;ii<size_of_constraint;ii++) { 5499 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5500 } 5501 } 5502 } 5503 #else /* on missing GESVD */ 5504 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5505 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5506 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5507 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5508 #if !defined(PETSC_USE_COMPLEX) 5509 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5510 #else 5511 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5512 #endif 5513 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5514 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5515 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5516 k = temp_constraints; 5517 if (k > size_of_constraint) k = size_of_constraint; 5518 j = 0; 5519 while (j < k && singular_vals[k-j-1] < tol) j++; 5520 valid_constraints = k-j; 5521 total_counts = total_counts-temp_constraints+valid_constraints; 5522 #endif /* on missing GESVD */ 5523 } 5524 } 5525 /* update pointers information */ 5526 if (valid_constraints) { 5527 constraints_n[total_counts_cc] = valid_constraints; 5528 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5529 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5530 /* set change_of_basis flag */ 5531 if (boolforchange) { 5532 PetscBTSet(change_basis,total_counts_cc); 5533 } 5534 total_counts_cc++; 5535 } 5536 } 5537 /* free workspace */ 5538 if (!skip_lapack) { 5539 ierr = PetscFree(work);CHKERRQ(ierr); 5540 #if defined(PETSC_USE_COMPLEX) 5541 ierr = PetscFree(rwork);CHKERRQ(ierr); 5542 #endif 5543 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5544 #if defined(PETSC_MISSING_LAPACK_GESVD) 5545 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5546 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5547 #endif 5548 } 5549 for (k=0;k<nnsp_size;k++) { 5550 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5551 } 5552 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5553 /* free index sets of faces, edges and vertices */ 5554 for (i=0;i<n_ISForFaces;i++) { 5555 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5556 } 5557 if (n_ISForFaces) { 5558 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5559 } 5560 for (i=0;i<n_ISForEdges;i++) { 5561 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5562 } 5563 if (n_ISForEdges) { 5564 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5565 } 5566 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5567 } else { 5568 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5569 5570 total_counts = 0; 5571 n_vertices = 0; 5572 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5573 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5574 } 5575 max_constraints = 0; 5576 total_counts_cc = 0; 5577 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5578 total_counts += pcbddc->adaptive_constraints_n[i]; 5579 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5580 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5581 } 5582 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5583 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5584 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5585 constraints_data = pcbddc->adaptive_constraints_data; 5586 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5587 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5588 total_counts_cc = 0; 5589 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5590 if (pcbddc->adaptive_constraints_n[i]) { 5591 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5592 } 5593 } 5594 #if 0 5595 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5596 for (i=0;i<total_counts_cc;i++) { 5597 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5598 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5599 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5600 printf(" %d",constraints_idxs[j]); 5601 } 5602 printf("\n"); 5603 printf("number of cc: %d\n",constraints_n[i]); 5604 } 5605 for (i=0;i<n_vertices;i++) { 5606 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5607 } 5608 for (i=0;i<sub_schurs->n_subs;i++) { 5609 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5610 } 5611 #endif 5612 5613 max_size_of_constraint = 0; 5614 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5615 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5616 /* Change of basis */ 5617 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5618 if (pcbddc->use_change_of_basis) { 5619 for (i=0;i<sub_schurs->n_subs;i++) { 5620 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5621 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5622 } 5623 } 5624 } 5625 } 5626 pcbddc->local_primal_size = total_counts; 5627 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5628 5629 /* map constraints_idxs in boundary numbering */ 5630 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5631 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5632 5633 /* Create constraint matrix */ 5634 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5635 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5636 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5637 5638 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5639 /* determine if a QR strategy is needed for change of basis */ 5640 qr_needed = PETSC_FALSE; 5641 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5642 total_primal_vertices=0; 5643 pcbddc->local_primal_size_cc = 0; 5644 for (i=0;i<total_counts_cc;i++) { 5645 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5646 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5647 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5648 pcbddc->local_primal_size_cc += 1; 5649 } else if (PetscBTLookup(change_basis,i)) { 5650 for (k=0;k<constraints_n[i];k++) { 5651 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5652 } 5653 pcbddc->local_primal_size_cc += constraints_n[i]; 5654 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5655 PetscBTSet(qr_needed_idx,i); 5656 qr_needed = PETSC_TRUE; 5657 } 5658 } else { 5659 pcbddc->local_primal_size_cc += 1; 5660 } 5661 } 5662 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5663 pcbddc->n_vertices = total_primal_vertices; 5664 /* permute indices in order to have a sorted set of vertices */ 5665 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5666 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5667 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5668 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5669 5670 /* nonzero structure of constraint matrix */ 5671 /* and get reference dof for local constraints */ 5672 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5673 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5674 5675 j = total_primal_vertices; 5676 total_counts = total_primal_vertices; 5677 cum = total_primal_vertices; 5678 for (i=n_vertices;i<total_counts_cc;i++) { 5679 if (!PetscBTLookup(change_basis,i)) { 5680 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5681 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5682 cum++; 5683 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5684 for (k=0;k<constraints_n[i];k++) { 5685 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5686 nnz[j+k] = size_of_constraint; 5687 } 5688 j += constraints_n[i]; 5689 } 5690 } 5691 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5692 ierr = PetscFree(nnz);CHKERRQ(ierr); 5693 5694 /* set values in constraint matrix */ 5695 for (i=0;i<total_primal_vertices;i++) { 5696 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5697 } 5698 total_counts = total_primal_vertices; 5699 for (i=n_vertices;i<total_counts_cc;i++) { 5700 if (!PetscBTLookup(change_basis,i)) { 5701 PetscInt *cols; 5702 5703 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5704 cols = constraints_idxs+constraints_idxs_ptr[i]; 5705 for (k=0;k<constraints_n[i];k++) { 5706 PetscInt row = total_counts+k; 5707 PetscScalar *vals; 5708 5709 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5710 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5711 } 5712 total_counts += constraints_n[i]; 5713 } 5714 } 5715 /* assembling */ 5716 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5717 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5718 5719 /* 5720 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5721 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5722 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5723 */ 5724 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5725 if (pcbddc->use_change_of_basis) { 5726 /* dual and primal dofs on a single cc */ 5727 PetscInt dual_dofs,primal_dofs; 5728 /* working stuff for GEQRF */ 5729 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5730 PetscBLASInt lqr_work; 5731 /* working stuff for UNGQR */ 5732 PetscScalar *gqr_work,lgqr_work_t; 5733 PetscBLASInt lgqr_work; 5734 /* working stuff for TRTRS */ 5735 PetscScalar *trs_rhs; 5736 PetscBLASInt Blas_NRHS; 5737 /* pointers for values insertion into change of basis matrix */ 5738 PetscInt *start_rows,*start_cols; 5739 PetscScalar *start_vals; 5740 /* working stuff for values insertion */ 5741 PetscBT is_primal; 5742 PetscInt *aux_primal_numbering_B; 5743 /* matrix sizes */ 5744 PetscInt global_size,local_size; 5745 /* temporary change of basis */ 5746 Mat localChangeOfBasisMatrix; 5747 /* extra space for debugging */ 5748 PetscScalar *dbg_work; 5749 5750 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5751 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5752 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5753 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5754 /* nonzeros for local mat */ 5755 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5756 if (!pcbddc->benign_change || pcbddc->fake_change) { 5757 for (i=0;i<pcis->n;i++) nnz[i]=1; 5758 } else { 5759 const PetscInt *ii; 5760 PetscInt n; 5761 PetscBool flg_row; 5762 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5763 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5764 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5765 } 5766 for (i=n_vertices;i<total_counts_cc;i++) { 5767 if (PetscBTLookup(change_basis,i)) { 5768 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5769 if (PetscBTLookup(qr_needed_idx,i)) { 5770 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5771 } else { 5772 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5773 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5774 } 5775 } 5776 } 5777 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5778 ierr = PetscFree(nnz);CHKERRQ(ierr); 5779 /* Set interior change in the matrix */ 5780 if (!pcbddc->benign_change || pcbddc->fake_change) { 5781 for (i=0;i<pcis->n;i++) { 5782 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5783 } 5784 } else { 5785 const PetscInt *ii,*jj; 5786 PetscScalar *aa; 5787 PetscInt n; 5788 PetscBool flg_row; 5789 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5790 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5791 for (i=0;i<n;i++) { 5792 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5793 } 5794 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5795 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5796 } 5797 5798 if (pcbddc->dbg_flag) { 5799 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5800 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5801 } 5802 5803 5804 /* Now we loop on the constraints which need a change of basis */ 5805 /* 5806 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5807 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5808 5809 Basic blocks of change of basis matrix T computed by 5810 5811 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5812 5813 | 1 0 ... 0 s_1/S | 5814 | 0 1 ... 0 s_2/S | 5815 | ... | 5816 | 0 ... 1 s_{n-1}/S | 5817 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5818 5819 with S = \sum_{i=1}^n s_i^2 5820 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5821 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5822 5823 - QR decomposition of constraints otherwise 5824 */ 5825 if (qr_needed) { 5826 /* space to store Q */ 5827 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5828 /* array to store scaling factors for reflectors */ 5829 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5830 /* first we issue queries for optimal work */ 5831 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5832 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5833 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5834 lqr_work = -1; 5835 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5836 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5837 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5838 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5839 lgqr_work = -1; 5840 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5841 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5842 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5843 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5844 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5845 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5846 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5847 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5848 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5849 /* array to store rhs and solution of triangular solver */ 5850 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5851 /* allocating workspace for check */ 5852 if (pcbddc->dbg_flag) { 5853 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5854 } 5855 } 5856 /* array to store whether a node is primal or not */ 5857 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5858 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5859 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5860 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5861 for (i=0;i<total_primal_vertices;i++) { 5862 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5863 } 5864 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5865 5866 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5867 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5868 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5869 if (PetscBTLookup(change_basis,total_counts)) { 5870 /* get constraint info */ 5871 primal_dofs = constraints_n[total_counts]; 5872 dual_dofs = size_of_constraint-primal_dofs; 5873 5874 if (pcbddc->dbg_flag) { 5875 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5876 } 5877 5878 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5879 5880 /* copy quadrature constraints for change of basis check */ 5881 if (pcbddc->dbg_flag) { 5882 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5883 } 5884 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5885 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5886 5887 /* compute QR decomposition of constraints */ 5888 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5889 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5890 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5891 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5892 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5893 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5894 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5895 5896 /* explictly compute R^-T */ 5897 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5898 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5899 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5900 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5901 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5902 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5904 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5905 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5906 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5907 5908 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5909 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5910 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5911 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5912 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5913 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5914 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5915 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5916 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5917 5918 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5919 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5920 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5921 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5922 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5923 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5924 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5925 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5926 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5927 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5928 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5930 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5931 5932 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5933 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5934 /* insert cols for primal dofs */ 5935 for (j=0;j<primal_dofs;j++) { 5936 start_vals = &qr_basis[j*size_of_constraint]; 5937 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5938 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5939 } 5940 /* insert cols for dual dofs */ 5941 for (j=0,k=0;j<dual_dofs;k++) { 5942 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5943 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5944 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5945 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5946 j++; 5947 } 5948 } 5949 5950 /* check change of basis */ 5951 if (pcbddc->dbg_flag) { 5952 PetscInt ii,jj; 5953 PetscBool valid_qr=PETSC_TRUE; 5954 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5955 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5957 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5958 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5959 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5960 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5961 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5962 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5963 for (jj=0;jj<size_of_constraint;jj++) { 5964 for (ii=0;ii<primal_dofs;ii++) { 5965 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5966 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5967 } 5968 } 5969 if (!valid_qr) { 5970 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5971 for (jj=0;jj<size_of_constraint;jj++) { 5972 for (ii=0;ii<primal_dofs;ii++) { 5973 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5974 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5975 } 5976 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5977 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5978 } 5979 } 5980 } 5981 } else { 5982 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5983 } 5984 } 5985 } else { /* simple transformation block */ 5986 PetscInt row,col; 5987 PetscScalar val,norm; 5988 5989 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5990 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5991 for (j=0;j<size_of_constraint;j++) { 5992 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5993 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5994 if (!PetscBTLookup(is_primal,row_B)) { 5995 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5996 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5997 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5998 } else { 5999 for (k=0;k<size_of_constraint;k++) { 6000 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6001 if (row != col) { 6002 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6003 } else { 6004 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6005 } 6006 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6007 } 6008 } 6009 } 6010 if (pcbddc->dbg_flag) { 6011 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6012 } 6013 } 6014 } else { 6015 if (pcbddc->dbg_flag) { 6016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6017 } 6018 } 6019 } 6020 6021 /* free workspace */ 6022 if (qr_needed) { 6023 if (pcbddc->dbg_flag) { 6024 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6025 } 6026 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6027 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6028 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6029 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6030 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6031 } 6032 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6033 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6034 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6035 6036 /* assembling of global change of variable */ 6037 if (!pcbddc->fake_change) { 6038 Mat tmat; 6039 PetscInt bs; 6040 6041 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6042 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6043 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6044 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6045 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6046 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6047 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6048 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6049 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6050 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6051 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6052 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6053 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6054 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6055 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6056 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6057 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6058 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6059 6060 /* check */ 6061 if (pcbddc->dbg_flag) { 6062 PetscReal error; 6063 Vec x,x_change; 6064 6065 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6066 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6067 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6068 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6069 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6070 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6071 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6072 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6073 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6074 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6075 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6076 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6077 if (error > PETSC_SMALL) { 6078 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6079 } 6080 ierr = VecDestroy(&x);CHKERRQ(ierr); 6081 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6082 } 6083 /* adapt sub_schurs computed (if any) */ 6084 if (pcbddc->use_deluxe_scaling) { 6085 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6086 6087 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6088 if (sub_schurs && sub_schurs->S_Ej_all) { 6089 Mat S_new,tmat; 6090 IS is_all_N,is_V_Sall = NULL; 6091 6092 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6093 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6094 if (pcbddc->deluxe_zerorows) { 6095 ISLocalToGlobalMapping NtoSall; 6096 IS is_V; 6097 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6098 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6099 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6100 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6101 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6102 } 6103 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6104 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6105 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6106 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6107 if (pcbddc->deluxe_zerorows) { 6108 const PetscScalar *array; 6109 const PetscInt *idxs_V,*idxs_all; 6110 PetscInt i,n_V; 6111 6112 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6113 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6114 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6115 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6116 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6117 for (i=0;i<n_V;i++) { 6118 PetscScalar val; 6119 PetscInt idx; 6120 6121 idx = idxs_V[i]; 6122 val = array[idxs_all[idxs_V[i]]]; 6123 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6124 } 6125 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6126 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6127 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6128 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6129 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6130 } 6131 sub_schurs->S_Ej_all = S_new; 6132 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6133 if (sub_schurs->sum_S_Ej_all) { 6134 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6135 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6136 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6137 if (pcbddc->deluxe_zerorows) { 6138 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6139 } 6140 sub_schurs->sum_S_Ej_all = S_new; 6141 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6142 } 6143 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6144 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6145 } 6146 /* destroy any change of basis context in sub_schurs */ 6147 if (sub_schurs && sub_schurs->change) { 6148 PetscInt i; 6149 6150 for (i=0;i<sub_schurs->n_subs;i++) { 6151 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6152 } 6153 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6154 } 6155 } 6156 if (pcbddc->switch_static) { /* need to save the local change */ 6157 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6158 } else { 6159 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6160 } 6161 /* determine if any process has changed the pressures locally */ 6162 pcbddc->change_interior = pcbddc->benign_have_null; 6163 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6164 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6165 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6166 pcbddc->use_qr_single = qr_needed; 6167 } 6168 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6169 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6170 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6171 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6172 } else { 6173 Mat benign_global = NULL; 6174 if (pcbddc->benign_have_null) { 6175 Mat tmat; 6176 6177 pcbddc->change_interior = PETSC_TRUE; 6178 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6179 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6180 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6181 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6182 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6183 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6184 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6185 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6186 if (pcbddc->benign_change) { 6187 Mat M; 6188 6189 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6190 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6191 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6192 ierr = MatDestroy(&M);CHKERRQ(ierr); 6193 } else { 6194 Mat eye; 6195 PetscScalar *array; 6196 6197 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6198 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6199 for (i=0;i<pcis->n;i++) { 6200 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6201 } 6202 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6203 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6204 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6205 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6206 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6207 } 6208 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6209 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6210 } 6211 if (pcbddc->user_ChangeOfBasisMatrix) { 6212 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6213 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6214 } else if (pcbddc->benign_have_null) { 6215 pcbddc->ChangeOfBasisMatrix = benign_global; 6216 } 6217 } 6218 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6219 IS is_global; 6220 const PetscInt *gidxs; 6221 6222 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6224 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6225 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6226 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6227 } 6228 } 6229 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6230 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6231 } 6232 6233 if (!pcbddc->fake_change) { 6234 /* add pressure dofs to set of primal nodes for numbering purposes */ 6235 for (i=0;i<pcbddc->benign_n;i++) { 6236 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6237 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6238 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6239 pcbddc->local_primal_size_cc++; 6240 pcbddc->local_primal_size++; 6241 } 6242 6243 /* check if a new primal space has been introduced (also take into account benign trick) */ 6244 pcbddc->new_primal_space_local = PETSC_TRUE; 6245 if (olocal_primal_size == pcbddc->local_primal_size) { 6246 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6247 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6248 if (!pcbddc->new_primal_space_local) { 6249 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6250 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6251 } 6252 } 6253 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6254 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6255 } 6256 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6257 6258 /* flush dbg viewer */ 6259 if (pcbddc->dbg_flag) { 6260 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6261 } 6262 6263 /* free workspace */ 6264 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6265 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6266 if (!pcbddc->adaptive_selection) { 6267 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6268 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6269 } else { 6270 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6271 pcbddc->adaptive_constraints_idxs_ptr, 6272 pcbddc->adaptive_constraints_data_ptr, 6273 pcbddc->adaptive_constraints_idxs, 6274 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6275 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6276 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6277 } 6278 PetscFunctionReturn(0); 6279 } 6280 6281 #undef __FUNCT__ 6282 #define __FUNCT__ "PCBDDCAnalyzeInterface" 6283 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6284 { 6285 ISLocalToGlobalMapping map; 6286 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6287 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6288 PetscInt ierr,i,N; 6289 6290 PetscFunctionBegin; 6291 if (pcbddc->recompute_topography) { 6292 pcbddc->graphanalyzed = PETSC_FALSE; 6293 /* Reset previously computed graph */ 6294 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6295 /* Init local Graph struct */ 6296 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6297 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6298 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6299 6300 /* Check validity of the csr graph passed in by the user */ 6301 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6302 6303 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6304 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 6305 PetscInt *xadj,*adjncy; 6306 PetscInt nvtxs; 6307 PetscBool flg_row=PETSC_FALSE; 6308 6309 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6310 if (flg_row) { 6311 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6312 pcbddc->computed_rowadj = PETSC_TRUE; 6313 } 6314 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6315 } 6316 if (pcbddc->dbg_flag) { 6317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6318 } 6319 6320 /* Setup of Graph */ 6321 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6322 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6323 6324 /* attach info on disconnected subdomains if present */ 6325 if (pcbddc->n_local_subs) { 6326 PetscInt *local_subs; 6327 6328 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6329 for (i=0;i<pcbddc->n_local_subs;i++) { 6330 const PetscInt *idxs; 6331 PetscInt nl,j; 6332 6333 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6334 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6335 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6336 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6337 } 6338 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6339 pcbddc->mat_graph->local_subs = local_subs; 6340 } 6341 } 6342 6343 if (!pcbddc->graphanalyzed) { 6344 /* Graph's connected components analysis */ 6345 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6346 pcbddc->graphanalyzed = PETSC_TRUE; 6347 } 6348 PetscFunctionReturn(0); 6349 } 6350 6351 #undef __FUNCT__ 6352 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6353 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6354 { 6355 PetscInt i,j; 6356 PetscScalar *alphas; 6357 PetscErrorCode ierr; 6358 6359 PetscFunctionBegin; 6360 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6361 for (i=0;i<n;i++) { 6362 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6363 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6364 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6365 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6366 } 6367 ierr = PetscFree(alphas);CHKERRQ(ierr); 6368 PetscFunctionReturn(0); 6369 } 6370 6371 #undef __FUNCT__ 6372 #define __FUNCT__ "MatISGetSubassemblingPattern" 6373 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6374 { 6375 Mat A; 6376 PetscInt n_neighs,*neighs,*n_shared,**shared; 6377 PetscMPIInt size,rank,color; 6378 PetscInt *xadj,*adjncy; 6379 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6380 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 6381 PetscInt void_procs,*procs_candidates = NULL; 6382 PetscInt xadj_count, *count; 6383 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6384 PetscSubcomm psubcomm; 6385 MPI_Comm subcomm; 6386 PetscErrorCode ierr; 6387 6388 PetscFunctionBegin; 6389 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6390 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6391 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6392 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6393 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6394 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6395 6396 if (have_void) *have_void = PETSC_FALSE; 6397 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6398 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6399 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6400 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6401 im_active = !!(n); 6402 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6403 void_procs = size - active_procs; 6404 /* get ranks of of non-active processes in mat communicator */ 6405 if (void_procs) { 6406 PetscInt ncand; 6407 6408 if (have_void) *have_void = PETSC_TRUE; 6409 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6410 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6411 for (i=0,ncand=0;i<size;i++) { 6412 if (!procs_candidates[i]) { 6413 procs_candidates[ncand++] = i; 6414 } 6415 } 6416 /* force n_subdomains to be not greater that the number of non-active processes */ 6417 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6418 } 6419 6420 /* number of subdomains requested greater than active processes -> just shift the matrix 6421 number of subdomains requested 1 -> send to master or first candidate in voids */ 6422 if (active_procs < *n_subdomains || *n_subdomains == 1) { 6423 PetscInt issize,isidx,dest; 6424 if (*n_subdomains == 1) dest = 0; 6425 else dest = rank; 6426 if (im_active) { 6427 issize = 1; 6428 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6429 isidx = procs_candidates[dest]; 6430 } else { 6431 isidx = dest; 6432 } 6433 } else { 6434 issize = 0; 6435 isidx = -1; 6436 } 6437 *n_subdomains = active_procs; 6438 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6439 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6440 PetscFunctionReturn(0); 6441 } 6442 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6443 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6444 threshold = PetscMax(threshold,2); 6445 6446 /* Get info on mapping */ 6447 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6448 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6449 6450 /* build local CSR graph of subdomains' connectivity */ 6451 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6452 xadj[0] = 0; 6453 xadj[1] = PetscMax(n_neighs-1,0); 6454 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6455 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6456 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6457 for (i=1;i<n_neighs;i++) 6458 for (j=0;j<n_shared[i];j++) 6459 count[shared[i][j]] += 1; 6460 6461 xadj_count = 0; 6462 for (i=1;i<n_neighs;i++) { 6463 for (j=0;j<n_shared[i];j++) { 6464 if (count[shared[i][j]] < threshold) { 6465 adjncy[xadj_count] = neighs[i]; 6466 adjncy_wgt[xadj_count] = n_shared[i]; 6467 xadj_count++; 6468 break; 6469 } 6470 } 6471 } 6472 xadj[1] = xadj_count; 6473 ierr = PetscFree(count);CHKERRQ(ierr); 6474 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6475 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6476 6477 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6478 6479 /* Restrict work on active processes only */ 6480 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6481 if (void_procs) { 6482 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6483 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6484 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6485 subcomm = PetscSubcommChild(psubcomm); 6486 } else { 6487 psubcomm = NULL; 6488 subcomm = PetscObjectComm((PetscObject)mat); 6489 } 6490 6491 v_wgt = NULL; 6492 if (!color) { 6493 ierr = PetscFree(xadj);CHKERRQ(ierr); 6494 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6495 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6496 } else { 6497 Mat subdomain_adj; 6498 IS new_ranks,new_ranks_contig; 6499 MatPartitioning partitioner; 6500 PetscInt rstart=0,rend=0; 6501 PetscInt *is_indices,*oldranks; 6502 PetscMPIInt size; 6503 PetscBool aggregate; 6504 6505 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6506 if (void_procs) { 6507 PetscInt prank = rank; 6508 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6509 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6510 for (i=0;i<xadj[1];i++) { 6511 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6512 } 6513 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6514 } else { 6515 oldranks = NULL; 6516 } 6517 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6518 if (aggregate) { /* TODO: all this part could be made more efficient */ 6519 PetscInt lrows,row,ncols,*cols; 6520 PetscMPIInt nrank; 6521 PetscScalar *vals; 6522 6523 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6524 lrows = 0; 6525 if (nrank<redprocs) { 6526 lrows = size/redprocs; 6527 if (nrank<size%redprocs) lrows++; 6528 } 6529 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6530 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6531 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6532 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6533 row = nrank; 6534 ncols = xadj[1]-xadj[0]; 6535 cols = adjncy; 6536 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6537 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6538 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6539 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6540 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6541 ierr = PetscFree(xadj);CHKERRQ(ierr); 6542 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6543 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6544 ierr = PetscFree(vals);CHKERRQ(ierr); 6545 if (use_vwgt) { 6546 Vec v; 6547 const PetscScalar *array; 6548 PetscInt nl; 6549 6550 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6551 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6552 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6553 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6554 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6555 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6556 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6557 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6558 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6559 ierr = VecDestroy(&v);CHKERRQ(ierr); 6560 } 6561 } else { 6562 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6563 if (use_vwgt) { 6564 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6565 v_wgt[0] = local_size; 6566 } 6567 } 6568 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6569 6570 /* Partition */ 6571 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6572 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6573 if (v_wgt) { 6574 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6575 } 6576 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6577 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6578 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6579 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6580 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6581 6582 /* renumber new_ranks to avoid "holes" in new set of processors */ 6583 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6584 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6585 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6586 if (!aggregate) { 6587 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6588 #if defined(PETSC_USE_DEBUG) 6589 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6590 #endif 6591 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6592 } else if (oldranks) { 6593 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6594 } else { 6595 ranks_send_to_idx[0] = is_indices[0]; 6596 } 6597 } else { 6598 PetscInt idxs[1]; 6599 PetscMPIInt tag; 6600 MPI_Request *reqs; 6601 6602 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6603 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6604 for (i=rstart;i<rend;i++) { 6605 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6606 } 6607 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6608 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6609 ierr = PetscFree(reqs);CHKERRQ(ierr); 6610 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6611 #if defined(PETSC_USE_DEBUG) 6612 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6613 #endif 6614 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6615 } else if (oldranks) { 6616 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6617 } else { 6618 ranks_send_to_idx[0] = idxs[0]; 6619 } 6620 } 6621 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6622 /* clean up */ 6623 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6624 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6625 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6626 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6627 } 6628 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6629 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6630 6631 /* assemble parallel IS for sends */ 6632 i = 1; 6633 if (!color) i=0; 6634 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6635 PetscFunctionReturn(0); 6636 } 6637 6638 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6639 6640 #undef __FUNCT__ 6641 #define __FUNCT__ "PCBDDCMatISSubassemble" 6642 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6643 { 6644 Mat local_mat; 6645 IS is_sends_internal; 6646 PetscInt rows,cols,new_local_rows; 6647 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6648 PetscBool ismatis,isdense,newisdense,destroy_mat; 6649 ISLocalToGlobalMapping l2gmap; 6650 PetscInt* l2gmap_indices; 6651 const PetscInt* is_indices; 6652 MatType new_local_type; 6653 /* buffers */ 6654 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6655 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6656 PetscInt *recv_buffer_idxs_local; 6657 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6658 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6659 /* MPI */ 6660 MPI_Comm comm,comm_n; 6661 PetscSubcomm subcomm; 6662 PetscMPIInt n_sends,n_recvs,commsize; 6663 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6664 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6665 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6666 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6667 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6668 PetscErrorCode ierr; 6669 6670 PetscFunctionBegin; 6671 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6672 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6673 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6674 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6675 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6676 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6677 PetscValidLogicalCollectiveBool(mat,reuse,6); 6678 PetscValidLogicalCollectiveInt(mat,nis,8); 6679 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6680 if (nvecs) { 6681 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6682 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6683 } 6684 /* further checks */ 6685 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6686 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6687 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6688 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6689 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6690 if (reuse && *mat_n) { 6691 PetscInt mrows,mcols,mnrows,mncols; 6692 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6693 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6694 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6695 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6696 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6697 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6698 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6699 } 6700 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6701 PetscValidLogicalCollectiveInt(mat,bs,0); 6702 6703 /* prepare IS for sending if not provided */ 6704 if (!is_sends) { 6705 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6706 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6707 } else { 6708 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6709 is_sends_internal = is_sends; 6710 } 6711 6712 /* get comm */ 6713 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6714 6715 /* compute number of sends */ 6716 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6717 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6718 6719 /* compute number of receives */ 6720 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6721 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6722 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6723 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6724 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6725 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6726 ierr = PetscFree(iflags);CHKERRQ(ierr); 6727 6728 /* restrict comm if requested */ 6729 subcomm = 0; 6730 destroy_mat = PETSC_FALSE; 6731 if (restrict_comm) { 6732 PetscMPIInt color,subcommsize; 6733 6734 color = 0; 6735 if (restrict_full) { 6736 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6737 } else { 6738 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6739 } 6740 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6741 subcommsize = commsize - subcommsize; 6742 /* check if reuse has been requested */ 6743 if (reuse) { 6744 if (*mat_n) { 6745 PetscMPIInt subcommsize2; 6746 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6747 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6748 comm_n = PetscObjectComm((PetscObject)*mat_n); 6749 } else { 6750 comm_n = PETSC_COMM_SELF; 6751 } 6752 } else { /* MAT_INITIAL_MATRIX */ 6753 PetscMPIInt rank; 6754 6755 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6756 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6757 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6758 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6759 comm_n = PetscSubcommChild(subcomm); 6760 } 6761 /* flag to destroy *mat_n if not significative */ 6762 if (color) destroy_mat = PETSC_TRUE; 6763 } else { 6764 comm_n = comm; 6765 } 6766 6767 /* prepare send/receive buffers */ 6768 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6769 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6770 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6771 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6772 if (nis) { 6773 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6774 } 6775 6776 /* Get data from local matrices */ 6777 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6778 /* TODO: See below some guidelines on how to prepare the local buffers */ 6779 /* 6780 send_buffer_vals should contain the raw values of the local matrix 6781 send_buffer_idxs should contain: 6782 - MatType_PRIVATE type 6783 - PetscInt size_of_l2gmap 6784 - PetscInt global_row_indices[size_of_l2gmap] 6785 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6786 */ 6787 else { 6788 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6789 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6790 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6791 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6792 send_buffer_idxs[1] = i; 6793 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6794 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6795 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6796 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6797 for (i=0;i<n_sends;i++) { 6798 ilengths_vals[is_indices[i]] = len*len; 6799 ilengths_idxs[is_indices[i]] = len+2; 6800 } 6801 } 6802 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6803 /* additional is (if any) */ 6804 if (nis) { 6805 PetscMPIInt psum; 6806 PetscInt j; 6807 for (j=0,psum=0;j<nis;j++) { 6808 PetscInt plen; 6809 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6810 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6811 psum += len+1; /* indices + lenght */ 6812 } 6813 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6814 for (j=0,psum=0;j<nis;j++) { 6815 PetscInt plen; 6816 const PetscInt *is_array_idxs; 6817 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6818 send_buffer_idxs_is[psum] = plen; 6819 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6820 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6821 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6822 psum += plen+1; /* indices + lenght */ 6823 } 6824 for (i=0;i<n_sends;i++) { 6825 ilengths_idxs_is[is_indices[i]] = psum; 6826 } 6827 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6828 } 6829 6830 buf_size_idxs = 0; 6831 buf_size_vals = 0; 6832 buf_size_idxs_is = 0; 6833 buf_size_vecs = 0; 6834 for (i=0;i<n_recvs;i++) { 6835 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6836 buf_size_vals += (PetscInt)olengths_vals[i]; 6837 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6838 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6839 } 6840 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6841 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6842 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6843 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6844 6845 /* get new tags for clean communications */ 6846 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6847 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6848 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6849 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6850 6851 /* allocate for requests */ 6852 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6853 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6854 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6855 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6856 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6857 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6858 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6859 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6860 6861 /* communications */ 6862 ptr_idxs = recv_buffer_idxs; 6863 ptr_vals = recv_buffer_vals; 6864 ptr_idxs_is = recv_buffer_idxs_is; 6865 ptr_vecs = recv_buffer_vecs; 6866 for (i=0;i<n_recvs;i++) { 6867 source_dest = onodes[i]; 6868 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6869 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6870 ptr_idxs += olengths_idxs[i]; 6871 ptr_vals += olengths_vals[i]; 6872 if (nis) { 6873 source_dest = onodes_is[i]; 6874 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6875 ptr_idxs_is += olengths_idxs_is[i]; 6876 } 6877 if (nvecs) { 6878 source_dest = onodes[i]; 6879 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6880 ptr_vecs += olengths_idxs[i]-2; 6881 } 6882 } 6883 for (i=0;i<n_sends;i++) { 6884 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6885 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6886 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6887 if (nis) { 6888 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6889 } 6890 if (nvecs) { 6891 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6892 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6893 } 6894 } 6895 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6896 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6897 6898 /* assemble new l2g map */ 6899 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6900 ptr_idxs = recv_buffer_idxs; 6901 new_local_rows = 0; 6902 for (i=0;i<n_recvs;i++) { 6903 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6904 ptr_idxs += olengths_idxs[i]; 6905 } 6906 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6907 ptr_idxs = recv_buffer_idxs; 6908 new_local_rows = 0; 6909 for (i=0;i<n_recvs;i++) { 6910 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6911 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6912 ptr_idxs += olengths_idxs[i]; 6913 } 6914 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6915 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6916 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6917 6918 /* infer new local matrix type from received local matrices type */ 6919 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6920 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6921 if (n_recvs) { 6922 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6923 ptr_idxs = recv_buffer_idxs; 6924 for (i=0;i<n_recvs;i++) { 6925 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6926 new_local_type_private = MATAIJ_PRIVATE; 6927 break; 6928 } 6929 ptr_idxs += olengths_idxs[i]; 6930 } 6931 switch (new_local_type_private) { 6932 case MATDENSE_PRIVATE: 6933 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6934 new_local_type = MATSEQAIJ; 6935 bs = 1; 6936 } else { /* if I receive only 1 dense matrix */ 6937 new_local_type = MATSEQDENSE; 6938 bs = 1; 6939 } 6940 break; 6941 case MATAIJ_PRIVATE: 6942 new_local_type = MATSEQAIJ; 6943 bs = 1; 6944 break; 6945 case MATBAIJ_PRIVATE: 6946 new_local_type = MATSEQBAIJ; 6947 break; 6948 case MATSBAIJ_PRIVATE: 6949 new_local_type = MATSEQSBAIJ; 6950 break; 6951 default: 6952 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6953 break; 6954 } 6955 } else { /* by default, new_local_type is seqdense */ 6956 new_local_type = MATSEQDENSE; 6957 bs = 1; 6958 } 6959 6960 /* create MATIS object if needed */ 6961 if (!reuse) { 6962 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6963 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6964 } else { 6965 /* it also destroys the local matrices */ 6966 if (*mat_n) { 6967 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6968 } else { /* this is a fake object */ 6969 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6970 } 6971 } 6972 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6973 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6974 6975 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6976 6977 /* Global to local map of received indices */ 6978 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6979 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6980 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6981 6982 /* restore attributes -> type of incoming data and its size */ 6983 buf_size_idxs = 0; 6984 for (i=0;i<n_recvs;i++) { 6985 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6986 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6987 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6988 } 6989 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6990 6991 /* set preallocation */ 6992 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6993 if (!newisdense) { 6994 PetscInt *new_local_nnz=0; 6995 6996 ptr_idxs = recv_buffer_idxs_local; 6997 if (n_recvs) { 6998 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6999 } 7000 for (i=0;i<n_recvs;i++) { 7001 PetscInt j; 7002 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7003 for (j=0;j<*(ptr_idxs+1);j++) { 7004 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7005 } 7006 } else { 7007 /* TODO */ 7008 } 7009 ptr_idxs += olengths_idxs[i]; 7010 } 7011 if (new_local_nnz) { 7012 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7013 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7014 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7015 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7016 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7017 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7018 } else { 7019 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7020 } 7021 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7022 } else { 7023 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7024 } 7025 7026 /* set values */ 7027 ptr_vals = recv_buffer_vals; 7028 ptr_idxs = recv_buffer_idxs_local; 7029 for (i=0;i<n_recvs;i++) { 7030 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7031 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7032 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7033 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7034 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7035 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7036 } else { 7037 /* TODO */ 7038 } 7039 ptr_idxs += olengths_idxs[i]; 7040 ptr_vals += olengths_vals[i]; 7041 } 7042 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7043 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7044 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7045 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7046 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7047 7048 #if 0 7049 if (!restrict_comm) { /* check */ 7050 Vec lvec,rvec; 7051 PetscReal infty_error; 7052 7053 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7054 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7055 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7056 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7057 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7058 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7059 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7060 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7061 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7062 } 7063 #endif 7064 7065 /* assemble new additional is (if any) */ 7066 if (nis) { 7067 PetscInt **temp_idxs,*count_is,j,psum; 7068 7069 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7070 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7071 ptr_idxs = recv_buffer_idxs_is; 7072 psum = 0; 7073 for (i=0;i<n_recvs;i++) { 7074 for (j=0;j<nis;j++) { 7075 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7076 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7077 psum += plen; 7078 ptr_idxs += plen+1; /* shift pointer to received data */ 7079 } 7080 } 7081 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7082 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7083 for (i=1;i<nis;i++) { 7084 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7085 } 7086 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7087 ptr_idxs = recv_buffer_idxs_is; 7088 for (i=0;i<n_recvs;i++) { 7089 for (j=0;j<nis;j++) { 7090 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7091 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7092 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7093 ptr_idxs += plen+1; /* shift pointer to received data */ 7094 } 7095 } 7096 for (i=0;i<nis;i++) { 7097 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7098 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7099 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7100 } 7101 ierr = PetscFree(count_is);CHKERRQ(ierr); 7102 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7103 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7104 } 7105 /* free workspace */ 7106 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7107 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7108 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7109 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7110 if (isdense) { 7111 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7112 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7113 } else { 7114 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7115 } 7116 if (nis) { 7117 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7118 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7119 } 7120 7121 if (nvecs) { 7122 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7123 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7124 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7125 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7126 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7127 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7128 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7129 /* set values */ 7130 ptr_vals = recv_buffer_vecs; 7131 ptr_idxs = recv_buffer_idxs_local; 7132 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7133 for (i=0;i<n_recvs;i++) { 7134 PetscInt j; 7135 for (j=0;j<*(ptr_idxs+1);j++) { 7136 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7137 } 7138 ptr_idxs += olengths_idxs[i]; 7139 ptr_vals += olengths_idxs[i]-2; 7140 } 7141 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7142 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7143 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7144 } 7145 7146 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7147 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7148 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7149 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7150 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7151 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7152 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7153 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7154 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7155 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7156 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7157 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7158 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7159 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7160 ierr = PetscFree(onodes);CHKERRQ(ierr); 7161 if (nis) { 7162 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7163 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7164 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7165 } 7166 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7167 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7168 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7169 for (i=0;i<nis;i++) { 7170 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7171 } 7172 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7173 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7174 } 7175 *mat_n = NULL; 7176 } 7177 PetscFunctionReturn(0); 7178 } 7179 7180 /* temporary hack into ksp private data structure */ 7181 #include <petsc/private/kspimpl.h> 7182 7183 #undef __FUNCT__ 7184 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 7185 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7186 { 7187 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7188 PC_IS *pcis = (PC_IS*)pc->data; 7189 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7190 Mat coarsedivudotp = NULL; 7191 Mat coarseG,t_coarse_mat_is; 7192 MatNullSpace CoarseNullSpace = NULL; 7193 ISLocalToGlobalMapping coarse_islg; 7194 IS coarse_is,*isarray; 7195 PetscInt i,im_active=-1,active_procs=-1; 7196 PetscInt nis,nisdofs,nisneu,nisvert; 7197 PC pc_temp; 7198 PCType coarse_pc_type; 7199 KSPType coarse_ksp_type; 7200 PetscBool multilevel_requested,multilevel_allowed; 7201 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7202 PetscInt ncoarse,nedcfield; 7203 PetscBool compute_vecs = PETSC_FALSE; 7204 PetscScalar *array; 7205 MatReuse coarse_mat_reuse; 7206 PetscBool restr, full_restr, have_void; 7207 PetscErrorCode ierr; 7208 7209 PetscFunctionBegin; 7210 /* Assign global numbering to coarse dofs */ 7211 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7212 PetscInt ocoarse_size; 7213 compute_vecs = PETSC_TRUE; 7214 ocoarse_size = pcbddc->coarse_size; 7215 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7216 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7217 /* see if we can avoid some work */ 7218 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7219 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7220 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7221 PC pc; 7222 PetscBool isbddc; 7223 7224 /* temporary workaround since PCBDDC does not have a reset method so far */ 7225 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 7226 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 7227 if (isbddc) { 7228 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 7229 } else { 7230 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7231 } 7232 coarse_reuse = PETSC_FALSE; 7233 } else { /* we can safely reuse already computed coarse matrix */ 7234 coarse_reuse = PETSC_TRUE; 7235 } 7236 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7237 coarse_reuse = PETSC_FALSE; 7238 } 7239 /* reset any subassembling information */ 7240 if (!coarse_reuse || pcbddc->recompute_topography) { 7241 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7242 } 7243 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7244 coarse_reuse = PETSC_TRUE; 7245 } 7246 /* assemble coarse matrix */ 7247 if (coarse_reuse && pcbddc->coarse_ksp) { 7248 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7249 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7250 coarse_mat_reuse = MAT_REUSE_MATRIX; 7251 } else { 7252 coarse_mat = NULL; 7253 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7254 } 7255 7256 /* creates temporary l2gmap and IS for coarse indexes */ 7257 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7258 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7259 7260 /* creates temporary MATIS object for coarse matrix */ 7261 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7262 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7263 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7264 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7265 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7266 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7267 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7268 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7269 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7270 7271 /* count "active" (i.e. with positive local size) and "void" processes */ 7272 im_active = !!(pcis->n); 7273 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7274 7275 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7276 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7277 /* full_restr : just use the receivers from the subassembling pattern */ 7278 coarse_mat_is = NULL; 7279 multilevel_allowed = PETSC_FALSE; 7280 multilevel_requested = PETSC_FALSE; 7281 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7282 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7283 if (multilevel_requested) { 7284 ncoarse = active_procs/pcbddc->coarsening_ratio; 7285 restr = PETSC_FALSE; 7286 full_restr = PETSC_FALSE; 7287 } else { 7288 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7289 restr = PETSC_TRUE; 7290 full_restr = PETSC_TRUE; 7291 } 7292 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7293 ncoarse = PetscMax(1,ncoarse); 7294 if (!pcbddc->coarse_subassembling) { 7295 if (pcbddc->coarsening_ratio > 1) { 7296 ierr = MatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7297 } else { 7298 PetscMPIInt size,rank; 7299 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7300 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7301 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7302 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7303 } 7304 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7305 PetscInt psum; 7306 PetscMPIInt size; 7307 if (pcbddc->coarse_ksp) psum = 1; 7308 else psum = 0; 7309 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7310 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7311 if (ncoarse < size) have_void = PETSC_TRUE; 7312 } 7313 /* determine if we can go multilevel */ 7314 if (multilevel_requested) { 7315 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7316 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7317 } 7318 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7319 7320 /* dump subassembling pattern */ 7321 if (pcbddc->dbg_flag && multilevel_allowed) { 7322 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7323 } 7324 7325 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7326 nedcfield = -1; 7327 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7328 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7329 const PetscInt *idxs; 7330 ISLocalToGlobalMapping tmap; 7331 7332 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7333 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7334 /* allocate space for temporary storage */ 7335 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7336 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7337 /* allocate for IS array */ 7338 nisdofs = pcbddc->n_ISForDofsLocal; 7339 if (pcbddc->nedclocal) { 7340 if (pcbddc->nedfield > -1) { 7341 nedcfield = pcbddc->nedfield; 7342 } else { 7343 nedcfield = 0; 7344 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7345 nisdofs = 1; 7346 } 7347 } 7348 nisneu = !!pcbddc->NeumannBoundariesLocal; 7349 nisvert = 0; /* nisvert is not used */ 7350 nis = nisdofs + nisneu + nisvert; 7351 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7352 /* dofs splitting */ 7353 for (i=0;i<nisdofs;i++) { 7354 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7355 if (nedcfield != i) { 7356 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7357 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7358 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7359 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7360 } else { 7361 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7362 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7363 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7364 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7365 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7366 } 7367 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7368 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7369 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7370 } 7371 /* neumann boundaries */ 7372 if (pcbddc->NeumannBoundariesLocal) { 7373 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7374 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7375 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7376 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7377 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7378 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7379 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7380 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7381 } 7382 /* free memory */ 7383 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7384 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7385 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7386 } else { 7387 nis = 0; 7388 nisdofs = 0; 7389 nisneu = 0; 7390 nisvert = 0; 7391 isarray = NULL; 7392 } 7393 /* destroy no longer needed map */ 7394 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7395 7396 /* subassemble */ 7397 if (multilevel_allowed) { 7398 Vec vp[1]; 7399 PetscInt nvecs = 0; 7400 PetscBool reuse,reuser; 7401 7402 if (coarse_mat) reuse = PETSC_TRUE; 7403 else reuse = PETSC_FALSE; 7404 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7405 vp[0] = NULL; 7406 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7407 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7408 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7409 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7410 nvecs = 1; 7411 7412 if (pcbddc->divudotp) { 7413 Mat B,loc_divudotp; 7414 Vec v,p; 7415 IS dummy; 7416 PetscInt np; 7417 7418 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7419 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7420 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7421 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7422 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7423 ierr = VecSet(p,1.);CHKERRQ(ierr); 7424 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7425 ierr = VecDestroy(&p);CHKERRQ(ierr); 7426 ierr = MatDestroy(&B);CHKERRQ(ierr); 7427 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7428 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7429 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7430 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7431 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7432 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7433 ierr = VecDestroy(&v);CHKERRQ(ierr); 7434 } 7435 } 7436 if (reuser) { 7437 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7438 } else { 7439 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7440 } 7441 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7442 PetscScalar *arraym,*arrayv; 7443 PetscInt nl; 7444 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7445 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7446 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7447 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7448 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7449 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7450 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7451 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7452 } else { 7453 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7454 } 7455 } else { 7456 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7457 } 7458 if (coarse_mat_is || coarse_mat) { 7459 PetscMPIInt size; 7460 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7461 if (!multilevel_allowed) { 7462 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7463 } else { 7464 Mat A; 7465 7466 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7467 if (coarse_mat_is) { 7468 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7469 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7470 coarse_mat = coarse_mat_is; 7471 } 7472 /* be sure we don't have MatSeqDENSE as local mat */ 7473 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7474 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7475 } 7476 } 7477 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7478 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7479 7480 /* create local to global scatters for coarse problem */ 7481 if (compute_vecs) { 7482 PetscInt lrows; 7483 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7484 if (coarse_mat) { 7485 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7486 } else { 7487 lrows = 0; 7488 } 7489 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7490 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7491 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7492 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7493 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7494 } 7495 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7496 7497 /* set defaults for coarse KSP and PC */ 7498 if (multilevel_allowed) { 7499 coarse_ksp_type = KSPRICHARDSON; 7500 coarse_pc_type = PCBDDC; 7501 } else { 7502 coarse_ksp_type = KSPPREONLY; 7503 coarse_pc_type = PCREDUNDANT; 7504 } 7505 7506 /* print some info if requested */ 7507 if (pcbddc->dbg_flag) { 7508 if (!multilevel_allowed) { 7509 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7510 if (multilevel_requested) { 7511 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7512 } else if (pcbddc->max_levels) { 7513 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7514 } 7515 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7516 } 7517 } 7518 7519 /* communicate coarse discrete gradient */ 7520 coarseG = NULL; 7521 if (pcbddc->nedcG && multilevel_allowed) { 7522 MPI_Comm ccomm; 7523 if (coarse_mat) { 7524 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7525 } else { 7526 ccomm = MPI_COMM_NULL; 7527 } 7528 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7529 } 7530 7531 /* create the coarse KSP object only once with defaults */ 7532 if (coarse_mat) { 7533 PetscViewer dbg_viewer = NULL; 7534 if (pcbddc->dbg_flag) { 7535 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7536 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7537 } 7538 if (!pcbddc->coarse_ksp) { 7539 char prefix[256],str_level[16]; 7540 size_t len; 7541 7542 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7543 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7544 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7545 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7546 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7547 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7548 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7549 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7550 /* TODO is this logic correct? should check for coarse_mat type */ 7551 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7552 /* prefix */ 7553 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7554 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7555 if (!pcbddc->current_level) { 7556 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7557 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7558 } else { 7559 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7560 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7561 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7562 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7563 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7564 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7565 } 7566 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7567 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7568 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7569 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7570 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7571 /* allow user customization */ 7572 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7573 } 7574 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7575 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7576 if (nisdofs) { 7577 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7578 for (i=0;i<nisdofs;i++) { 7579 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7580 } 7581 } 7582 if (nisneu) { 7583 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7584 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7585 } 7586 if (nisvert) { 7587 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7588 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7589 } 7590 if (coarseG) { 7591 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7592 } 7593 7594 /* get some info after set from options */ 7595 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7596 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7597 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7598 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7599 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7600 isbddc = PETSC_FALSE; 7601 } 7602 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7603 if (isredundant) { 7604 KSP inner_ksp; 7605 PC inner_pc; 7606 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7607 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7608 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7609 } 7610 7611 /* parameters which miss an API */ 7612 if (isbddc) { 7613 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7614 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7615 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7616 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7617 if (pcbddc_coarse->benign_saddle_point) { 7618 Mat coarsedivudotp_is; 7619 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7620 IS row,col; 7621 const PetscInt *gidxs; 7622 PetscInt n,st,M,N; 7623 7624 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7625 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7626 st = st-n; 7627 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7628 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7629 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7630 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7631 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7632 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7633 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7634 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7635 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7636 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7637 ierr = ISDestroy(&row);CHKERRQ(ierr); 7638 ierr = ISDestroy(&col);CHKERRQ(ierr); 7639 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7640 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7641 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7642 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7643 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7644 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7645 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7646 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7647 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7648 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7649 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7650 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7651 } 7652 } 7653 7654 /* propagate symmetry info of coarse matrix */ 7655 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7656 if (pc->pmat->symmetric_set) { 7657 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7658 } 7659 if (pc->pmat->hermitian_set) { 7660 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7661 } 7662 if (pc->pmat->spd_set) { 7663 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7664 } 7665 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7666 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7667 } 7668 /* set operators */ 7669 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7670 if (pcbddc->dbg_flag) { 7671 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7672 } 7673 } 7674 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7675 ierr = PetscFree(isarray);CHKERRQ(ierr); 7676 #if 0 7677 { 7678 PetscViewer viewer; 7679 char filename[256]; 7680 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7681 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7682 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7683 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7684 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7685 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7686 } 7687 #endif 7688 7689 if (pcbddc->coarse_ksp) { 7690 Vec crhs,csol; 7691 7692 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7693 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7694 if (!csol) { 7695 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7696 } 7697 if (!crhs) { 7698 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7699 } 7700 } 7701 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7702 7703 /* compute null space for coarse solver if the benign trick has been requested */ 7704 if (pcbddc->benign_null) { 7705 7706 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7707 for (i=0;i<pcbddc->benign_n;i++) { 7708 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7709 } 7710 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7711 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7712 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7713 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7714 if (coarse_mat) { 7715 Vec nullv; 7716 PetscScalar *array,*array2; 7717 PetscInt nl; 7718 7719 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7720 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7721 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7722 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7723 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7724 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7725 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7726 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7727 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7728 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7729 } 7730 } 7731 7732 if (pcbddc->coarse_ksp) { 7733 PetscBool ispreonly; 7734 7735 if (CoarseNullSpace) { 7736 PetscBool isnull; 7737 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7738 if (isnull) { 7739 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7740 } 7741 /* TODO: add local nullspaces (if any) */ 7742 } 7743 /* setup coarse ksp */ 7744 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7745 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7746 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7747 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7748 KSP check_ksp; 7749 KSPType check_ksp_type; 7750 PC check_pc; 7751 Vec check_vec,coarse_vec; 7752 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7753 PetscInt its; 7754 PetscBool compute_eigs; 7755 PetscReal *eigs_r,*eigs_c; 7756 PetscInt neigs; 7757 const char *prefix; 7758 7759 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7760 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7761 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7762 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7763 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7764 /* prevent from setup unneeded object */ 7765 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7766 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7767 if (ispreonly) { 7768 check_ksp_type = KSPPREONLY; 7769 compute_eigs = PETSC_FALSE; 7770 } else { 7771 check_ksp_type = KSPGMRES; 7772 compute_eigs = PETSC_TRUE; 7773 } 7774 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7775 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7776 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7777 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7778 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7779 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7780 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7781 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7782 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7783 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7784 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7785 /* create random vec */ 7786 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7787 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7788 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7789 /* solve coarse problem */ 7790 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7791 /* set eigenvalue estimation if preonly has not been requested */ 7792 if (compute_eigs) { 7793 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7794 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7795 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7796 if (neigs) { 7797 lambda_max = eigs_r[neigs-1]; 7798 lambda_min = eigs_r[0]; 7799 if (pcbddc->use_coarse_estimates) { 7800 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7801 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7802 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7803 } 7804 } 7805 } 7806 } 7807 7808 /* check coarse problem residual error */ 7809 if (pcbddc->dbg_flag) { 7810 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7811 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7812 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7813 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7814 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7815 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7816 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7817 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7818 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7819 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7820 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7821 if (CoarseNullSpace) { 7822 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7823 } 7824 if (compute_eigs) { 7825 PetscReal lambda_max_s,lambda_min_s; 7826 KSPConvergedReason reason; 7827 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7828 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7829 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7830 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7831 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7832 for (i=0;i<neigs;i++) { 7833 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7834 } 7835 } 7836 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7837 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7838 } 7839 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7840 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7841 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7842 if (compute_eigs) { 7843 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7844 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7845 } 7846 } 7847 } 7848 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7849 /* print additional info */ 7850 if (pcbddc->dbg_flag) { 7851 /* waits until all processes reaches this point */ 7852 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7853 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7854 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7855 } 7856 7857 /* free memory */ 7858 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7859 PetscFunctionReturn(0); 7860 } 7861 7862 #undef __FUNCT__ 7863 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7864 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7865 { 7866 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7867 PC_IS* pcis = (PC_IS*)pc->data; 7868 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7869 IS subset,subset_mult,subset_n; 7870 PetscInt local_size,coarse_size=0; 7871 PetscInt *local_primal_indices=NULL; 7872 const PetscInt *t_local_primal_indices; 7873 PetscErrorCode ierr; 7874 7875 PetscFunctionBegin; 7876 /* Compute global number of coarse dofs */ 7877 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7878 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7879 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7880 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7881 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7882 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7883 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7884 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7885 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7886 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7887 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7888 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7889 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7890 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7891 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7892 7893 /* check numbering */ 7894 if (pcbddc->dbg_flag) { 7895 PetscScalar coarsesum,*array,*array2; 7896 PetscInt i; 7897 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7898 7899 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7900 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7901 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7902 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7903 /* counter */ 7904 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7905 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7906 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7907 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7908 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7909 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7910 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7911 for (i=0;i<pcbddc->local_primal_size;i++) { 7912 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7913 } 7914 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7915 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7916 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7917 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7918 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7919 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7920 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7921 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7922 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7923 for (i=0;i<pcis->n;i++) { 7924 if (array[i] != 0.0 && array[i] != array2[i]) { 7925 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7926 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7927 set_error = PETSC_TRUE; 7928 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7929 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7930 } 7931 } 7932 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7933 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7934 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7935 for (i=0;i<pcis->n;i++) { 7936 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7937 } 7938 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7939 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7940 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7941 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7942 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7943 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7944 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7945 PetscInt *gidxs; 7946 7947 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7948 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7949 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7951 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7952 for (i=0;i<pcbddc->local_primal_size;i++) { 7953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7954 } 7955 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7956 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7957 } 7958 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7959 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7960 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7961 } 7962 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7963 /* get back data */ 7964 *coarse_size_n = coarse_size; 7965 *local_primal_indices_n = local_primal_indices; 7966 PetscFunctionReturn(0); 7967 } 7968 7969 #undef __FUNCT__ 7970 #define __FUNCT__ "PCBDDCGlobalToLocal" 7971 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7972 { 7973 IS localis_t; 7974 PetscInt i,lsize,*idxs,n; 7975 PetscScalar *vals; 7976 PetscErrorCode ierr; 7977 7978 PetscFunctionBegin; 7979 /* get indices in local ordering exploiting local to global map */ 7980 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7981 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7982 for (i=0;i<lsize;i++) vals[i] = 1.0; 7983 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7984 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7985 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7986 if (idxs) { /* multilevel guard */ 7987 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7988 } 7989 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7990 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7991 ierr = PetscFree(vals);CHKERRQ(ierr); 7992 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7993 /* now compute set in local ordering */ 7994 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7995 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7996 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7997 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7998 for (i=0,lsize=0;i<n;i++) { 7999 if (PetscRealPart(vals[i]) > 0.5) { 8000 lsize++; 8001 } 8002 } 8003 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8004 for (i=0,lsize=0;i<n;i++) { 8005 if (PetscRealPart(vals[i]) > 0.5) { 8006 idxs[lsize++] = i; 8007 } 8008 } 8009 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8010 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8011 *localis = localis_t; 8012 PetscFunctionReturn(0); 8013 } 8014 8015 #undef __FUNCT__ 8016 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 8017 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8018 { 8019 PC_IS *pcis=(PC_IS*)pc->data; 8020 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8021 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8022 Mat S_j; 8023 PetscInt *used_xadj,*used_adjncy; 8024 PetscBool free_used_adj; 8025 PetscErrorCode ierr; 8026 8027 PetscFunctionBegin; 8028 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8029 free_used_adj = PETSC_FALSE; 8030 if (pcbddc->sub_schurs_layers == -1) { 8031 used_xadj = NULL; 8032 used_adjncy = NULL; 8033 } else { 8034 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8035 used_xadj = pcbddc->mat_graph->xadj; 8036 used_adjncy = pcbddc->mat_graph->adjncy; 8037 } else if (pcbddc->computed_rowadj) { 8038 used_xadj = pcbddc->mat_graph->xadj; 8039 used_adjncy = pcbddc->mat_graph->adjncy; 8040 } else { 8041 PetscBool flg_row=PETSC_FALSE; 8042 const PetscInt *xadj,*adjncy; 8043 PetscInt nvtxs; 8044 8045 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8046 if (flg_row) { 8047 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8048 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8049 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8050 free_used_adj = PETSC_TRUE; 8051 } else { 8052 pcbddc->sub_schurs_layers = -1; 8053 used_xadj = NULL; 8054 used_adjncy = NULL; 8055 } 8056 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8057 } 8058 } 8059 8060 /* setup sub_schurs data */ 8061 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8062 if (!sub_schurs->schur_explicit) { 8063 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8064 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8065 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8066 } else { 8067 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8068 PetscBool isseqaij,need_change = PETSC_FALSE; 8069 PetscInt benign_n; 8070 Mat change = NULL; 8071 Vec scaling = NULL; 8072 IS change_primal = NULL; 8073 8074 if (!pcbddc->use_vertices && reuse_solvers) { 8075 PetscInt n_vertices; 8076 8077 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8078 reuse_solvers = (PetscBool)!n_vertices; 8079 } 8080 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8081 if (!isseqaij) { 8082 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8083 if (matis->A == pcbddc->local_mat) { 8084 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8085 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8086 } else { 8087 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8088 } 8089 } 8090 if (!pcbddc->benign_change_explicit) { 8091 benign_n = pcbddc->benign_n; 8092 } else { 8093 benign_n = 0; 8094 } 8095 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8096 We need a global reduction to avoid possible deadlocks. 8097 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8098 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8099 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8100 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8101 need_change = (PetscBool)(!need_change); 8102 } 8103 /* If the user defines additional constraints, we import them here. 8104 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8105 if (need_change) { 8106 PC_IS *pcisf; 8107 PC_BDDC *pcbddcf; 8108 PC pcf; 8109 8110 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8111 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8112 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8113 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8114 /* hacks */ 8115 pcisf = (PC_IS*)pcf->data; 8116 pcisf->is_B_local = pcis->is_B_local; 8117 pcisf->vec1_N = pcis->vec1_N; 8118 pcisf->BtoNmap = pcis->BtoNmap; 8119 pcisf->n = pcis->n; 8120 pcisf->n_B = pcis->n_B; 8121 pcbddcf = (PC_BDDC*)pcf->data; 8122 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8123 pcbddcf->mat_graph = pcbddc->mat_graph; 8124 pcbddcf->use_faces = PETSC_TRUE; 8125 pcbddcf->use_change_of_basis = PETSC_TRUE; 8126 pcbddcf->use_change_on_faces = PETSC_TRUE; 8127 pcbddcf->use_qr_single = PETSC_TRUE; 8128 pcbddcf->fake_change = PETSC_TRUE; 8129 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8130 /* store information on primal vertices and change of basis (in local numbering) */ 8131 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8132 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8133 change = pcbddcf->ConstraintMatrix; 8134 pcbddcf->ConstraintMatrix = NULL; 8135 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8136 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8137 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8138 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8139 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8140 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8141 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8142 pcf->ops->destroy = NULL; 8143 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8144 } 8145 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8146 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8147 ierr = MatDestroy(&change);CHKERRQ(ierr); 8148 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8149 } 8150 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8151 8152 /* free adjacency */ 8153 if (free_used_adj) { 8154 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8155 } 8156 PetscFunctionReturn(0); 8157 } 8158 8159 #undef __FUNCT__ 8160 #define __FUNCT__ "PCBDDCInitSubSchurs" 8161 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8162 { 8163 PC_IS *pcis=(PC_IS*)pc->data; 8164 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8165 PCBDDCGraph graph; 8166 PetscErrorCode ierr; 8167 8168 PetscFunctionBegin; 8169 /* attach interface graph for determining subsets */ 8170 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8171 IS verticesIS,verticescomm; 8172 PetscInt vsize,*idxs; 8173 8174 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8175 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8176 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8177 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8178 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8179 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8180 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8181 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8182 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8183 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8184 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8185 } else { 8186 graph = pcbddc->mat_graph; 8187 } 8188 /* print some info */ 8189 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8190 IS vertices; 8191 PetscInt nv,nedges,nfaces; 8192 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8193 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8194 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8195 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8196 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8197 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8198 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8199 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8201 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8202 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8203 } 8204 8205 /* sub_schurs init */ 8206 if (!pcbddc->sub_schurs) { 8207 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8208 } 8209 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8210 8211 /* free graph struct */ 8212 if (pcbddc->sub_schurs_rebuild) { 8213 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8214 } 8215 PetscFunctionReturn(0); 8216 } 8217 8218 #undef __FUNCT__ 8219 #define __FUNCT__ "PCBDDCCheckOperator" 8220 PetscErrorCode PCBDDCCheckOperator(PC pc) 8221 { 8222 PC_IS *pcis=(PC_IS*)pc->data; 8223 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8224 PetscErrorCode ierr; 8225 8226 PetscFunctionBegin; 8227 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8228 IS zerodiag = NULL; 8229 Mat S_j,B0_B=NULL; 8230 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8231 PetscScalar *p0_check,*array,*array2; 8232 PetscReal norm; 8233 PetscInt i; 8234 8235 /* B0 and B0_B */ 8236 if (zerodiag) { 8237 IS dummy; 8238 8239 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8240 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8241 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8242 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8243 } 8244 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8245 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8246 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8247 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8248 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8249 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8250 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8251 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8252 /* S_j */ 8253 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8254 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8255 8256 /* mimic vector in \widetilde{W}_\Gamma */ 8257 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8258 /* continuous in primal space */ 8259 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8260 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8261 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8262 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8263 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8264 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8265 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8266 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8267 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8268 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8269 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8270 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8271 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8272 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8273 8274 /* assemble rhs for coarse problem */ 8275 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8276 /* local with Schur */ 8277 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8278 if (zerodiag) { 8279 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8280 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8281 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8282 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8283 } 8284 /* sum on primal nodes the local contributions */ 8285 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8286 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8287 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8288 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8289 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8290 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8291 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8292 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8293 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8294 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8295 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8296 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8297 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8298 /* scale primal nodes (BDDC sums contibutions) */ 8299 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8300 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8301 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8302 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8303 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8304 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8305 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8306 /* global: \widetilde{B0}_B w_\Gamma */ 8307 if (zerodiag) { 8308 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8309 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8310 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8311 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8312 } 8313 /* BDDC */ 8314 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8315 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8316 8317 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8318 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8319 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8320 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8321 for (i=0;i<pcbddc->benign_n;i++) { 8322 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8323 } 8324 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8325 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8326 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8327 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8328 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8329 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8330 } 8331 PetscFunctionReturn(0); 8332 } 8333 8334 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8335 #undef __FUNCT__ 8336 #define __FUNCT__ "MatMPIAIJRestrict" 8337 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8338 { 8339 Mat At; 8340 IS rows; 8341 PetscInt rst,ren; 8342 PetscErrorCode ierr; 8343 PetscLayout rmap; 8344 8345 PetscFunctionBegin; 8346 rst = ren = 0; 8347 if (ccomm != MPI_COMM_NULL) { 8348 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8349 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8350 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8351 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8352 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8353 } 8354 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8355 ierr = MatGetSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8356 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8357 8358 if (ccomm != MPI_COMM_NULL) { 8359 Mat_MPIAIJ *a,*b; 8360 IS from,to; 8361 Vec gvec; 8362 PetscInt lsize; 8363 8364 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8365 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8366 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8367 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8368 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8369 a = (Mat_MPIAIJ*)At->data; 8370 b = (Mat_MPIAIJ*)(*B)->data; 8371 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8372 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8373 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8374 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8375 b->A = a->A; 8376 b->B = a->B; 8377 8378 b->donotstash = a->donotstash; 8379 b->roworiented = a->roworiented; 8380 b->rowindices = 0; 8381 b->rowvalues = 0; 8382 b->getrowactive = PETSC_FALSE; 8383 8384 (*B)->rmap = rmap; 8385 (*B)->factortype = A->factortype; 8386 (*B)->assembled = PETSC_TRUE; 8387 (*B)->insertmode = NOT_SET_VALUES; 8388 (*B)->preallocated = PETSC_TRUE; 8389 8390 if (a->colmap) { 8391 #if defined(PETSC_USE_CTABLE) 8392 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8393 #else 8394 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8395 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8396 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8397 #endif 8398 } else b->colmap = 0; 8399 if (a->garray) { 8400 PetscInt len; 8401 len = a->B->cmap->n; 8402 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8403 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8404 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8405 } else b->garray = 0; 8406 8407 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8408 b->lvec = a->lvec; 8409 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8410 8411 /* cannot use VecScatterCopy */ 8412 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8413 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8414 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8415 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8416 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8417 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8418 ierr = ISDestroy(&from);CHKERRQ(ierr); 8419 ierr = ISDestroy(&to);CHKERRQ(ierr); 8420 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8421 } 8422 ierr = MatDestroy(&At);CHKERRQ(ierr); 8423 PetscFunctionReturn(0); 8424 } 8425