1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 8 9 /* returns B s.t. range(B) _|_ range(A) */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatDense_OrthogonalComplement" 12 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 13 { 14 #if !defined(PETSC_USE_COMPLEX) 15 PetscScalar *uwork,*data,*U, ds = 0.; 16 PetscReal *sing; 17 PetscBLASInt bM,bN,lwork,lierr,di = 1; 18 PetscInt ulw,i,nr,nc,n; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 #if defined(PETSC_MISSING_LAPACK_GESVD) 23 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 24 #endif 25 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 26 if (!nr || !nc) PetscFunctionReturn(0); 27 28 /* workspace */ 29 if (!work) { 30 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 31 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 32 } else { 33 ulw = lw; 34 uwork = work; 35 } 36 n = PetscMin(nr,nc); 37 if (!rwork) { 38 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 39 } else { 40 sing = rwork; 41 } 42 43 /* SVD */ 44 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 48 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 49 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 50 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 51 ierr = PetscFPTrapPop();CHKERRQ(ierr); 52 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 53 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 54 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 55 if (!rwork) { 56 ierr = PetscFree(sing);CHKERRQ(ierr); 57 } 58 if (!work) { 59 ierr = PetscFree(uwork);CHKERRQ(ierr); 60 } 61 /* create B */ 62 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 63 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 65 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscFree(U);CHKERRQ(ierr); 67 #else 68 PetscFunctionBegin; 69 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 70 #endif 71 PetscFunctionReturn(0); 72 } 73 74 /* TODO REMOVE */ 75 #if defined(PRINT_GDET) 76 static int inc = 0; 77 static int lev = 0; 78 #endif 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 82 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 83 { 84 PetscErrorCode ierr; 85 Mat GE,GEd; 86 PetscInt rsize,csize,esize; 87 PetscScalar *ptr; 88 89 PetscFunctionBegin; 90 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 91 if (!esize) PetscFunctionReturn(0); 92 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 93 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 94 95 /* gradients */ 96 ptr = work + 5*esize; 97 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 98 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 99 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 100 ierr = MatDestroy(&GE);CHKERRQ(ierr); 101 102 /* constants */ 103 ptr += rsize*csize; 104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 105 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 107 ierr = MatDestroy(&GE);CHKERRQ(ierr); 108 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 109 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 110 111 if (corners) { 112 Mat GEc; 113 PetscScalar *vals,v; 114 115 ierr = MatGetSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 116 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 117 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 118 v = PetscAbsScalar(vals[0]); 119 cvals[0] = vals[0]/v; 120 cvals[1] = vals[1]/v; 121 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 122 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 123 #if defined(PRINT_GDET) 124 { 125 PetscViewer viewer; 126 char filename[256]; 127 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 128 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 129 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 130 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 131 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 132 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 133 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 134 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 135 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 136 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 137 } 138 #endif 139 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 140 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 141 } 142 143 PetscFunctionReturn(0); 144 } 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "PCBDDCNedelecSupport" 148 PetscErrorCode PCBDDCNedelecSupport(PC pc) 149 { 150 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 151 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 152 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 153 Vec tvec; 154 PetscSF sfv; 155 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 156 MPI_Comm comm; 157 IS lned,primals,allprimals,nedfieldlocal; 158 IS *eedges,*extrows,*extcols,*alleedges; 159 PetscBT btv,bte,btvc,btb,btvcand,btvi,btee,bter; 160 PetscScalar *vals,*work; 161 PetscReal *rwork; 162 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 163 PetscInt ne,nv,Lv,order,n,field; 164 PetscInt n_neigh,*neigh,*n_shared,**shared; 165 PetscInt i,j,extmem,cum,maxsize,nee; 166 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 167 PetscInt *sfvleaves,*sfvroots; 168 PetscInt *corners,*cedges; 169 #if defined(PETSC_USE_DEBUG) 170 PetscInt *emarks; 171 #endif 172 PetscBool print,eerr,done,lrc[2],conforming,global; 173 PetscErrorCode ierr; 174 175 PetscFunctionBegin; 176 /* test variable order code and print debug info TODO: to be removed */ 177 print = PETSC_FALSE; 178 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 179 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 180 181 /* Return to caller if there are no edges in the decomposition */ 182 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 183 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 184 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 185 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 186 lrc[0] = PETSC_FALSE; 187 for (i=0;i<n;i++) { 188 if (PetscRealPart(vals[i]) > 2.) { 189 lrc[0] = PETSC_TRUE; 190 break; 191 } 192 } 193 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 194 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 195 if (!lrc[1]) PetscFunctionReturn(0); 196 197 /* If the discrete gradient is defined for a subset of dofs and global is true, 198 it assumes G is given in global ordering for all the dofs. 199 Otherwise, the ordering is global for the Nedelec field */ 200 order = pcbddc->nedorder; 201 conforming = pcbddc->conforming; 202 field = pcbddc->nedfield; 203 global = pcbddc->nedglobal; 204 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 205 if (pcbddc->n_ISForDofsLocal && field > -1) { 206 PetscBool setprimal = PETSC_FALSE; 207 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field_primal",&setprimal,NULL);CHKERRQ(ierr); 208 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 209 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 210 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 211 if (setprimal) { 212 IS enedfieldlocal; 213 PetscInt *eidxs; 214 215 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 216 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 217 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 for (i=0,cum=0;i<ne;i++) { 219 if (PetscRealPart(vals[idxs[i]]) > 2.) { 220 eidxs[cum++] = idxs[i]; 221 } 222 } 223 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 224 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 225 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 226 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 227 ierr = PetscFree(eidxs);CHKERRQ(ierr); 228 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 229 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 } else if (!pcbddc->n_ISForDofsLocal) { 233 PetscBool testnedfield = PETSC_FALSE; 234 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_nedelec_field",&testnedfield,NULL);CHKERRQ(ierr); 235 if (!testnedfield) { 236 ne = n; 237 nedfieldlocal = NULL; 238 } else { 239 /* ierr = ISCreateStride(comm,n,0,1,&nedfieldlocal);CHKERRQ(ierr); */ 240 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 241 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 242 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 243 for (i=0;i<n;i++) matis->sf_leafdata[i] = 1; 244 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 245 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 for (i=0,cum=0;i<n;i++) { 249 if (matis->sf_leafdata[i] > 1) { 250 matis->sf_leafdata[cum++] = i; 251 } 252 } 253 ierr = ISCreateGeneral(comm,cum,matis->sf_leafdata,PETSC_COPY_VALUES,&nedfieldlocal);CHKERRQ(ierr); 254 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 255 } 256 global = PETSC_TRUE; 257 } else { 258 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 259 } 260 261 if (nedfieldlocal) { /* merge with previous code when testing is done */ 262 IS is; 263 264 /* need to map from the local Nedelec field to local numbering */ 265 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 266 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 267 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 268 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 269 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 270 if (global) { 271 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 272 el2g = al2g; 273 } else { 274 IS gis; 275 276 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 277 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 278 ierr = ISDestroy(&gis);CHKERRQ(ierr); 279 } 280 ierr = ISDestroy(&is);CHKERRQ(ierr); 281 } else { 282 /* restore default */ 283 pcbddc->nedfield = -1; 284 /* one ref for the destruction of al2g, one for el2g */ 285 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 286 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 287 el2g = al2g; 288 fl2g = NULL; 289 } 290 291 /* Sanity checks */ 292 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 293 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 294 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 295 296 /* Drop connections for interior edges */ 297 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 298 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 299 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 300 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 301 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 302 if (nedfieldlocal) { 303 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 304 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 305 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 306 } else { 307 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 308 } 309 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 310 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 311 if (global) { 312 PetscInt rst; 313 314 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 315 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 316 if (matis->sf_rootdata[i] < 2) { 317 matis->sf_rootdata[cum++] = i + rst; 318 } 319 } 320 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 321 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 322 } else { 323 PetscInt *tbz; 324 325 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 326 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 327 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 328 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 for (i=0,cum=0;i<ne;i++) 330 if (matis->sf_leafdata[idxs[i]] == 1) 331 tbz[cum++] = i; 332 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 333 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 334 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 335 ierr = PetscFree(tbz);CHKERRQ(ierr); 336 } 337 338 /* Extract subdomain relevant rows of G */ 339 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 340 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 341 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 342 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 343 ierr = ISDestroy(&lned);CHKERRQ(ierr); 344 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 345 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 346 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 347 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 348 if (print) { 349 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 350 ierr = MatView(lG,NULL);CHKERRQ(ierr); 351 } 352 353 /* SF for nodal communications */ 354 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 355 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 356 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 357 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 358 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 359 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 360 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 361 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 362 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 363 364 /* Destroy temporary G created in MATIS format and modified G */ 365 ierr = MatDestroy(&G);CHKERRQ(ierr); 366 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 367 368 /* Save lG */ 369 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 370 371 /* Analyze the edge-nodes connections (duplicate lG) */ 372 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 373 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 374 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 375 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 376 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 377 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 378 /* need to import the boundary specification to ensure the 379 proper detection of coarse edges' endpoints */ 380 if (pcbddc->DirichletBoundariesLocal) { 381 IS is; 382 383 if (fl2g) { 384 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 385 } else { 386 is = pcbddc->DirichletBoundariesLocal; 387 } 388 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 389 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 390 for (i=0;i<cum;i++) { 391 if (idxs[i] >= 0) { 392 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 393 } 394 } 395 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 396 if (fl2g) { 397 ierr = ISDestroy(&is);CHKERRQ(ierr); 398 } 399 } 400 if (pcbddc->NeumannBoundariesLocal) { 401 IS is; 402 403 if (fl2g) { 404 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 405 } else { 406 is = pcbddc->NeumannBoundariesLocal; 407 } 408 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 409 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 410 for (i=0;i<cum;i++) { 411 if (idxs[i] >= 0) { 412 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 413 } 414 } 415 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 416 if (fl2g) { 417 ierr = ISDestroy(&is);CHKERRQ(ierr); 418 } 419 } 420 421 /* need to remove coarse faces' dofs to ensure the 422 proper detection of coarse edges' endpoints */ 423 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 424 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 425 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 426 for (i=1;i<n_neigh;i++) 427 for (j=0;j<n_shared[i];j++) 428 marks[shared[i][j]]++; 429 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 430 for (i=0;i<ne;i++) { 431 if (marks[i] > 1 || (marks[i] == 1 && PetscBTLookup(btb,i))) { 432 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 433 } 434 } 435 436 if (!conforming) { 437 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 438 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 439 } 440 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 441 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 442 cum = 0; 443 for (i=0;i<ne;i++) { 444 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 445 if (!PetscBTLookup(btee,i)) { 446 marks[cum++] = i; 447 continue; 448 } 449 /* set badly connected edge dofs as primal */ 450 if (!conforming) { 451 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 452 marks[cum++] = i; 453 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 454 for (j=ii[i];j<ii[i+1];j++) { 455 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 456 } 457 } else { 458 /* every edge dofs should be connected trough a certain number of nodal dofs 459 to other edge dofs belonging to coarse edges 460 - at most 2 endpoints 461 - order-1 interior nodal dofs 462 - no undefined nodal dofs (nconn < order) 463 */ 464 PetscInt ends = 0,ints = 0, undef = 0; 465 for (j=ii[i];j<ii[i+1];j++) { 466 PetscInt v = jj[j],k; 467 PetscInt nconn = iit[v+1]-iit[v]; 468 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 469 if (nconn > order) ends++; 470 else if (nconn == order) ints++; 471 else undef++; 472 } 473 if (undef || ends > 2 || ints != order -1) { 474 marks[cum++] = i; 475 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 476 for (j=ii[i];j<ii[i+1];j++) { 477 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 478 } 479 } 480 } 481 } 482 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 483 if (!order && ii[i+1] != ii[i]) { 484 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 485 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 486 } 487 } 488 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 489 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 490 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 491 if (!conforming) { 492 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 493 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 494 } 495 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 496 /* identify splitpoints and corner candidates */ 497 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 498 if (print) { 499 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 500 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 501 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 502 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 503 } 504 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 505 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 506 for (i=0;i<nv;i++) { 507 PetscInt ord = order, test = ii[i+1]-ii[i]; 508 if (!order) { /* variable order */ 509 PetscReal vorder = 0.; 510 511 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 512 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 513 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 514 ord = 1; 515 } 516 #if defined(PETSC_USE_DEBUG) 517 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 518 #endif 519 if (test >= 3*ord) { /* splitpoints */ 520 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 521 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 522 } else if (test == ord) { 523 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 524 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 525 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 526 } else { 527 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 528 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 529 } 530 } 531 } 532 533 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 534 if (order != 1) { 535 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 536 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 537 for (i=0;i<nv;i++) { 538 if (PetscBTLookup(btvcand,i)) { 539 PetscBool found = PETSC_FALSE; 540 for (j=ii[i];j<ii[i+1] && !found;j++) { 541 PetscInt k,e = jj[j]; 542 if (PetscBTLookup(bte,e)) continue; 543 for (k=iit[e];k<iit[e+1];k++) { 544 PetscInt v = jjt[k]; 545 if (v != i && PetscBTLookup(btvcand,v)) { 546 found = PETSC_TRUE; 547 break; 548 } 549 } 550 } 551 if (!found) { 552 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 553 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 554 } else { 555 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 556 } 557 } 558 } 559 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 560 } 561 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 562 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 563 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 564 565 /* Get the local G^T explicitly */ 566 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 567 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 568 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 569 570 /* Mark interior nodal dofs */ 571 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 572 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 573 for (i=1;i<n_neigh;i++) { 574 for (j=0;j<n_shared[i];j++) { 575 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 576 } 577 } 578 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 579 580 /* communicate corners and splitpoints */ 581 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 582 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 583 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 584 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 585 586 if (print) { 587 IS tbz; 588 589 cum = 0; 590 for (i=0;i<nv;i++) 591 if (sfvleaves[i]) 592 vmarks[cum++] = i; 593 594 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 595 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 596 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 597 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 598 } 599 600 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 601 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 602 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 603 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 604 605 /* Zero rows of lGt corresponding to identified corners 606 and interior nodal dofs */ 607 cum = 0; 608 for (i=0;i<nv;i++) { 609 if (sfvleaves[i]) { 610 vmarks[cum++] = i; 611 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 612 } 613 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 614 } 615 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 616 if (print) { 617 IS tbz; 618 619 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 620 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 621 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 622 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 623 } 624 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 625 ierr = PetscFree(vmarks);CHKERRQ(ierr); 626 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 627 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 628 629 /* Recompute G */ 630 ierr = MatDestroy(&lG);CHKERRQ(ierr); 631 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 632 if (print) { 633 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 634 ierr = MatView(lG,NULL);CHKERRQ(ierr); 635 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 636 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 637 } 638 639 /* Get primal dofs (if any) */ 640 cum = 0; 641 for (i=0;i<ne;i++) { 642 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 643 } 644 if (fl2g) { 645 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 646 } 647 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 648 if (print) { 649 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 650 ierr = ISView(primals,NULL);CHKERRQ(ierr); 651 } 652 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 653 /* TODO: what if the user passed in some of them ? */ 654 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 655 ierr = ISDestroy(&primals);CHKERRQ(ierr); 656 657 /* Compute edge connectivity */ 658 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 659 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 660 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 661 if (fl2g) { 662 PetscBT btf; 663 PetscInt *iia,*jja,*iiu,*jju; 664 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 665 666 /* create CSR for all local dofs */ 667 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 668 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 669 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 670 iiu = pcbddc->mat_graph->xadj; 671 jju = pcbddc->mat_graph->adjncy; 672 } else if (pcbddc->use_local_adj) { 673 rest = PETSC_TRUE; 674 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 675 } else { 676 free = PETSC_TRUE; 677 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 678 iiu[0] = 0; 679 for (i=0;i<n;i++) { 680 iiu[i+1] = i+1; 681 jju[i] = -1; 682 } 683 } 684 685 /* import sizes of CSR */ 686 iia[0] = 0; 687 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 688 689 /* overwrite entries corresponding to the Nedelec field */ 690 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 691 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 692 for (i=0;i<ne;i++) { 693 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 694 iia[idxs[i]+1] = ii[i+1]-ii[i]; 695 } 696 697 /* iia in CSR */ 698 for (i=0;i<n;i++) iia[i+1] += iia[i]; 699 700 /* jja in CSR */ 701 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 702 for (i=0;i<n;i++) 703 if (!PetscBTLookup(btf,i)) 704 for (j=0;j<iiu[i+1]-iiu[i];j++) 705 jja[iia[i]+j] = jju[iiu[i]+j]; 706 707 /* map edge dofs connectivity */ 708 if (jj) { 709 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 710 for (i=0;i<ne;i++) { 711 PetscInt e = idxs[i]; 712 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 713 } 714 } 715 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 716 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 717 if (rest) { 718 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 719 } 720 if (free) { 721 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 722 } 723 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 724 } else { 725 if (jj) { 726 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 727 } 728 } 729 730 /* Analyze interface for edge dofs */ 731 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 732 733 /* Get coarse edges in the edge space */ 734 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 735 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 736 737 if (fl2g) { 738 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 739 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 740 for (i=0;i<nee;i++) { 741 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 742 } 743 } else { 744 eedges = alleedges; 745 primals = allprimals; 746 } 747 748 /* Mark fine edge dofs with their coarse edge id */ 749 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 750 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 751 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 752 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 753 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 754 if (print) { 755 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 756 ierr = ISView(primals,NULL);CHKERRQ(ierr); 757 } 758 759 maxsize = 0; 760 for (i=0;i<nee;i++) { 761 PetscInt size,mark = i+1; 762 763 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 764 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 765 for (j=0;j<size;j++) marks[idxs[j]] = mark; 766 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 767 maxsize = PetscMax(maxsize,size); 768 } 769 770 /* Find coarse edge endpoints */ 771 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 773 for (i=0;i<nee;i++) { 774 PetscInt mark = i+1,size; 775 776 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 777 if (!size && nedfieldlocal) continue; 778 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 779 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 780 if (print) { 781 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 782 ISView(eedges[i],NULL); 783 } 784 for (j=0;j<size;j++) { 785 PetscInt k, ee = idxs[j]; 786 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 787 for (k=ii[ee];k<ii[ee+1];k++) { 788 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 789 if (PetscBTLookup(btv,jj[k])) { 790 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 791 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 792 PetscInt k2; 793 PetscBool corner = PETSC_FALSE; 794 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 795 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 796 /* it's a corner if either is connected with an edge dof belonging to a different cc or 797 if the edge dof lie on the natural part of the boundary */ 798 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 799 corner = PETSC_TRUE; 800 break; 801 } 802 } 803 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 804 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 805 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 806 } else { 807 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 808 } 809 } 810 } 811 } 812 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 813 } 814 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 815 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 816 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 817 818 /* Reset marked primal dofs */ 819 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 820 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 821 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 822 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 823 824 /* Now use the initial lG */ 825 ierr = MatDestroy(&lG);CHKERRQ(ierr); 826 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 827 lG = lGinit; 828 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 829 830 /* Compute extended cols indices */ 831 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 832 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 833 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 834 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 835 i *= maxsize; 836 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 837 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 838 eerr = PETSC_FALSE; 839 for (i=0;i<nee;i++) { 840 PetscInt size,found = 0; 841 842 cum = 0; 843 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 844 if (!size && nedfieldlocal) continue; 845 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 846 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 847 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 848 for (j=0;j<size;j++) { 849 PetscInt k,ee = idxs[j]; 850 for (k=ii[ee];k<ii[ee+1];k++) { 851 PetscInt vv = jj[k]; 852 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 853 else if (!PetscBTLookupSet(btvc,vv)) found++; 854 } 855 } 856 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 857 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 858 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 859 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 860 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 861 /* it may happen that endpoints are not defined at this point 862 if it is the case, mark this edge for a second pass */ 863 if (cum != size -1 || found != 2) { 864 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 865 if (print) { 866 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 867 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 868 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 869 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 870 } 871 eerr = PETSC_TRUE; 872 } 873 } 874 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 875 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 876 if (done) { 877 PetscInt *newprimals; 878 879 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 880 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 881 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 882 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 883 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 884 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 885 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 886 for (i=0;i<nee;i++) { 887 PetscBool has_candidates = PETSC_FALSE; 888 if (PetscBTLookup(bter,i)) { 889 PetscInt size,mark = i+1; 890 891 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 892 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 893 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 894 for (j=0;j<size;j++) { 895 PetscInt k,ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 /* set all candidates located on the edge as corners */ 899 if (PetscBTLookup(btvcand,jj[k])) { 900 PetscInt k2,vv = jj[k]; 901 has_candidates = PETSC_TRUE; 902 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 903 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 904 /* set all edge dofs connected to candidate as primals */ 905 for (k2=iit[vv];k2<iit[vv+1];k2++) { 906 if (marks[jjt[k2]] == mark) { 907 PetscInt k3,ee2 = jjt[k2]; 908 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 909 newprimals[cum++] = ee2; 910 /* finally set the new corners */ 911 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 912 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 913 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 914 } 915 } 916 } 917 } else { 918 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 919 } 920 } 921 } 922 if (!has_candidates) { /* circular edge */ 923 PetscInt k, ee = idxs[0],*tmarks; 924 925 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 926 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 927 for (k=ii[ee];k<ii[ee+1];k++) { 928 PetscInt k2; 929 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 930 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 931 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 932 } 933 for (j=0;j<size;j++) { 934 if (tmarks[idxs[j]] > 1) { 935 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 936 newprimals[cum++] = idxs[j]; 937 } 938 } 939 ierr = PetscFree(tmarks);CHKERRQ(ierr); 940 } 941 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 942 } 943 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 944 } 945 ierr = PetscFree(extcols);CHKERRQ(ierr); 946 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 947 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 948 if (fl2g) { 949 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 950 ierr = ISDestroy(&primals);CHKERRQ(ierr); 951 for (i=0;i<nee;i++) { 952 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 953 } 954 ierr = PetscFree(eedges);CHKERRQ(ierr); 955 } 956 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 957 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 958 ierr = PetscFree(newprimals);CHKERRQ(ierr); 959 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 960 ierr = ISDestroy(&primals);CHKERRQ(ierr); 961 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 962 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 963 if (fl2g) { 964 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 965 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 966 for (i=0;i<nee;i++) { 967 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 968 } 969 } else { 970 eedges = alleedges; 971 primals = allprimals; 972 } 973 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 974 975 /* Mark again */ 976 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 977 for (i=0;i<nee;i++) { 978 PetscInt size,mark = i+1; 979 980 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 981 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 982 for (j=0;j<size;j++) marks[idxs[j]] = mark; 983 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 984 } 985 if (print) { 986 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 987 ierr = ISView(primals,NULL);CHKERRQ(ierr); 988 } 989 990 /* Recompute extended cols */ 991 eerr = PETSC_FALSE; 992 for (i=0;i<nee;i++) { 993 PetscInt size; 994 995 cum = 0; 996 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 997 if (!size && nedfieldlocal) continue; 998 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 999 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1000 for (j=0;j<size;j++) { 1001 PetscInt k,ee = idxs[j]; 1002 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1003 } 1004 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1005 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1006 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1007 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1008 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1009 if (cum != size -1) { 1010 if (print) { 1011 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1012 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1013 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1014 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1015 } 1016 eerr = PETSC_TRUE; 1017 } 1018 } 1019 } 1020 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1021 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1022 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1023 /* an error should not occur at this point */ 1024 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1025 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1026 1027 /* Check the number of endpoints */ 1028 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1029 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1030 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1031 for (i=0;i<nee;i++) { 1032 PetscInt size, found = 0, gc[2]; 1033 1034 /* init with defaults */ 1035 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1036 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1037 if (!size && nedfieldlocal) continue; 1038 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1039 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1040 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1041 for (j=0;j<size;j++) { 1042 PetscInt k,ee = idxs[j]; 1043 for (k=ii[ee];k<ii[ee+1];k++) { 1044 PetscInt vv = jj[k]; 1045 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1046 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1047 corners[i*2+found++] = vv; 1048 } 1049 } 1050 } 1051 if (found != 2) { 1052 PetscInt e; 1053 if (fl2g) { 1054 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1055 } else { 1056 e = idxs[0]; 1057 } 1058 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1059 } 1060 1061 /* get primal dof index on this coarse edge */ 1062 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1063 if (gc[0] > gc[1]) { 1064 PetscInt swap = corners[2*i]; 1065 corners[2*i] = corners[2*i+1]; 1066 corners[2*i+1] = swap; 1067 } 1068 cedges[i] = idxs[size-1]; 1069 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1070 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1071 } 1072 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1073 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1074 1075 #if defined(PETSC_USE_DEBUG) 1076 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1077 not interfere with neighbouring coarse edges */ 1078 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1079 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1080 for (i=0;i<nv;i++) { 1081 PetscInt emax = 0,eemax = 0; 1082 1083 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1084 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1085 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1086 for (j=1;j<nee+1;j++) { 1087 if (emax < emarks[j]) { 1088 emax = emarks[j]; 1089 eemax = j; 1090 } 1091 } 1092 /* not relevant for edges */ 1093 if (!eemax) continue; 1094 1095 for (j=ii[i];j<ii[i+1];j++) { 1096 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1097 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1098 } 1099 } 1100 } 1101 ierr = PetscFree(emarks);CHKERRQ(ierr); 1102 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1103 #endif 1104 1105 /* Compute extended rows indices for edge blocks of the change of basis */ 1106 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1107 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1108 extmem *= maxsize; 1109 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1110 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1111 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1112 for (i=0;i<nv;i++) { 1113 PetscInt mark = 0,size,start; 1114 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1115 for (j=ii[i];j<ii[i+1];j++) 1116 if (marks[jj[j]] && !mark) 1117 mark = marks[jj[j]]; 1118 1119 /* not relevant */ 1120 if (!mark) continue; 1121 1122 /* import extended row */ 1123 mark--; 1124 start = mark*extmem+extrowcum[mark]; 1125 size = ii[i+1]-ii[i]; 1126 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1127 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1128 extrowcum[mark] += size; 1129 } 1130 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1131 cum = 0; 1132 for (i=0;i<nee;i++) { 1133 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1134 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1135 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1136 cum = PetscMax(cum,size); 1137 } 1138 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1139 ierr = PetscFree(marks);CHKERRQ(ierr); 1140 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1141 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1142 1143 /* Workspace for lapack inner calls and VecSetValues */ 1144 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1145 1146 /* Create change of basis matrix (preallocation can be improved) */ 1147 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1148 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1149 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1150 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1151 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1152 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1153 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1154 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1155 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1156 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1157 1158 /* Defaults to identity */ 1159 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1160 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1161 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1162 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1163 1164 /* Create discrete gradient for the coarser level if needed */ 1165 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1166 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1167 if (pcbddc->current_level < pcbddc->max_levels) { 1168 ISLocalToGlobalMapping cel2g,cvl2g; 1169 IS wis,gwis; 1170 PetscInt cnv,cne; 1171 1172 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1173 if (fl2g) { 1174 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1175 } else { 1176 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1177 pcbddc->nedclocal = wis; 1178 } 1179 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1180 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1181 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1182 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1183 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1184 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1185 1186 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1187 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1188 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1189 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1190 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1191 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1192 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1193 1194 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1195 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1196 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1197 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1198 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1199 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1200 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1201 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1202 } 1203 1204 #if defined(PRINT_GDET) 1205 inc = 0; 1206 lev = pcbddc->current_level; 1207 #endif 1208 for (i=0;i<nee;i++) { 1209 Mat Gins = NULL, GKins = NULL; 1210 IS cornersis = NULL; 1211 PetscScalar cvals[2]; 1212 1213 if (pcbddc->nedcG) { 1214 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1215 } 1216 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1217 if (Gins && GKins) { 1218 PetscScalar *data; 1219 const PetscInt *rows,*cols; 1220 PetscInt nrh,nch,nrc,ncc; 1221 1222 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1223 /* H1 */ 1224 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1225 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1226 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1227 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1228 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1229 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1230 /* complement */ 1231 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1232 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1233 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 1234 if (ncc != 1 && pcbddc->nedcG) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the dicrete gradient for the next level with ncc %d",ncc); 1235 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1236 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1237 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1238 1239 /* coarse discrete gradient */ 1240 if (pcbddc->nedcG) { 1241 PetscInt cols[2]; 1242 1243 cols[0] = 2*i; 1244 cols[1] = 2*i+1; 1245 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1246 } 1247 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1248 } 1249 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1250 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1251 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1252 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1253 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1254 } 1255 1256 /* Start assembling */ 1257 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1258 if (pcbddc->nedcG) { 1259 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1260 } 1261 1262 /* Free */ 1263 if (fl2g) { 1264 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1265 for (i=0;i<nee;i++) { 1266 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1267 } 1268 ierr = PetscFree(eedges);CHKERRQ(ierr); 1269 } 1270 1271 /* hack mat_graph with primal dofs on the coarse edges */ 1272 { 1273 PCBDDCGraph graph = pcbddc->mat_graph; 1274 PetscInt *oqueue = graph->queue; 1275 PetscInt *ocptr = graph->cptr; 1276 PetscInt ncc,*idxs; 1277 1278 /* find first primal edge */ 1279 if (pcbddc->nedclocal) { 1280 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1281 } else { 1282 if (fl2g) { 1283 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1284 } 1285 idxs = cedges; 1286 } 1287 cum = 0; 1288 while (cum < nee && cedges[cum] < 0) cum++; 1289 1290 /* adapt connected components */ 1291 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1292 graph->cptr[0] = 0; 1293 for (i=0,ncc=0;i<graph->ncc;i++) { 1294 PetscInt lc = ocptr[i+1]-ocptr[i]; 1295 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1296 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1297 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1298 ncc++; 1299 lc--; 1300 cum++; 1301 while (cum < nee && cedges[cum] < 0) cum++; 1302 } 1303 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1304 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1305 ncc++; 1306 } 1307 graph->ncc = ncc; 1308 if (pcbddc->nedclocal) { 1309 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1310 } 1311 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1312 } 1313 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1314 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1315 1316 1317 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1318 ierr = PetscFree(extrow);CHKERRQ(ierr); 1319 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1320 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1321 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1322 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1323 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1324 ierr = PetscFree(corners);CHKERRQ(ierr); 1325 ierr = PetscFree(cedges);CHKERRQ(ierr); 1326 ierr = PetscFree(extrows);CHKERRQ(ierr); 1327 ierr = PetscFree(extcols);CHKERRQ(ierr); 1328 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1329 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1330 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1331 1332 /* Complete assembling */ 1333 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1334 if (pcbddc->nedcG) { 1335 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1336 #if 0 1337 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1338 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1339 #endif 1340 } 1341 1342 /* set change of basis */ 1343 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 1344 #if 0 1345 if (pcbddc->current_level) { 1346 PetscViewer viewer; 1347 char filename[256]; 1348 Mat Tned; 1349 IS sub; 1350 PetscInt rst; 1351 1352 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1353 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 1354 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 1355 if (nedfieldlocal) { 1356 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1357 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 1358 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 1359 } else { 1360 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 1361 } 1362 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1363 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 1364 ierr = MatGetOwnershipRange(pc->pmat,&rst,NULL);CHKERRQ(ierr); 1365 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 1366 if (matis->sf_rootdata[i]) { 1367 matis->sf_rootdata[cum++] = i + rst; 1368 } 1369 } 1370 PetscPrintf(PETSC_COMM_SELF,"[%D] LEVEL %d MY ne %d cum %d\n",PetscGlobalRank,pcbddc->current_level,ne,cum); 1371 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cum,matis->sf_rootdata,PETSC_USE_POINTER,&sub);CHKERRQ(ierr); 1372 ierr = MatGetSubMatrix(T,sub,sub,MAT_INITIAL_MATRIX,&Tned);CHKERRQ(ierr); 1373 ierr = ISDestroy(&sub);CHKERRQ(ierr); 1374 1375 sprintf(filename,"Change_l%d.m",pcbddc->current_level); 1376 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)Tned),filename,&viewer);CHKERRQ(ierr); 1377 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1378 ierr = PetscObjectSetName((PetscObject)Tned,"T");CHKERRQ(ierr); 1379 ierr = MatView(Tned,viewer);CHKERRQ(ierr); 1380 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1381 ierr = MatDestroy(&Tned);CHKERRQ(ierr); 1382 } 1383 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1384 #endif 1385 ierr = MatDestroy(&T);CHKERRQ(ierr); 1386 1387 PetscFunctionReturn(0); 1388 } 1389 1390 /* the near-null space of BDDC carries information on quadrature weights, 1391 and these can be collinear -> so cheat with MatNullSpaceCreate 1392 and create a suitable set of basis vectors first */ 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "PCBDDCNullSpaceCreate" 1395 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1396 { 1397 PetscErrorCode ierr; 1398 PetscInt i; 1399 1400 PetscFunctionBegin; 1401 for (i=0;i<nvecs;i++) { 1402 PetscInt first,last; 1403 1404 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1405 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1406 if (i>=first && i < last) { 1407 PetscScalar *data; 1408 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1409 if (!has_const) { 1410 data[i-first] = 1.; 1411 } else { 1412 data[2*i-first] = 1./PetscSqrtReal(2.); 1413 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1414 } 1415 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1416 } 1417 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1418 } 1419 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1420 for (i=0;i<nvecs;i++) { /* reset vectors */ 1421 PetscInt first,last; 1422 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1423 if (i>=first && i < last) { 1424 PetscScalar *data; 1425 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1426 if (!has_const) { 1427 data[i-first] = 0.; 1428 } else { 1429 data[2*i-first] = 0.; 1430 data[2*i-first+1] = 0.; 1431 } 1432 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1433 } 1434 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 1441 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1442 { 1443 Mat loc_divudotp; 1444 Vec p,v,vins,quad_vec,*quad_vecs; 1445 ISLocalToGlobalMapping map; 1446 IS *faces,*edges; 1447 PetscScalar *vals; 1448 const PetscScalar *array; 1449 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1450 PetscMPIInt rank; 1451 PetscErrorCode ierr; 1452 1453 PetscFunctionBegin; 1454 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1455 if (graph->twodim) { 1456 lmaxneighs = 2; 1457 } else { 1458 lmaxneighs = 1; 1459 for (i=0;i<ne;i++) { 1460 const PetscInt *idxs; 1461 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1462 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1463 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1464 } 1465 lmaxneighs++; /* graph count does not include self */ 1466 } 1467 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1468 maxsize = 0; 1469 for (i=0;i<ne;i++) { 1470 PetscInt nn; 1471 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1472 maxsize = PetscMax(maxsize,nn); 1473 } 1474 for (i=0;i<nf;i++) { 1475 PetscInt nn; 1476 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1477 maxsize = PetscMax(maxsize,nn); 1478 } 1479 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1480 /* create vectors to hold quadrature weights */ 1481 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1482 if (!transpose) { 1483 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1484 } else { 1485 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1486 } 1487 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1488 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1489 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1490 for (i=0;i<maxneighs;i++) { 1491 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1492 } 1493 1494 /* compute local quad vec */ 1495 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1496 if (!transpose) { 1497 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1498 } else { 1499 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1500 } 1501 ierr = VecSet(p,1.);CHKERRQ(ierr); 1502 if (!transpose) { 1503 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1504 } else { 1505 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1506 } 1507 if (vl2l) { 1508 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1509 } else { 1510 vins = v; 1511 } 1512 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1513 ierr = VecDestroy(&p);CHKERRQ(ierr); 1514 1515 /* insert in global quadrature vecs */ 1516 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1517 for (i=0;i<nf;i++) { 1518 const PetscInt *idxs; 1519 PetscInt idx,nn,j; 1520 1521 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1522 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1523 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1524 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1525 idx = -(idx+1); 1526 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1527 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1528 } 1529 for (i=0;i<ne;i++) { 1530 const PetscInt *idxs; 1531 PetscInt idx,nn,j; 1532 1533 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1534 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1535 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1536 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1537 idx = -(idx+1); 1538 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1539 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1540 } 1541 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1542 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1543 if (vl2l) { 1544 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1545 } 1546 ierr = VecDestroy(&v);CHKERRQ(ierr); 1547 ierr = PetscFree(vals);CHKERRQ(ierr); 1548 1549 /* assemble near null space */ 1550 for (i=0;i<maxneighs;i++) { 1551 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1552 } 1553 for (i=0;i<maxneighs;i++) { 1554 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1555 } 1556 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 1563 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1564 { 1565 PetscErrorCode ierr; 1566 Vec local,global; 1567 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1568 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1569 1570 PetscFunctionBegin; 1571 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1572 /* need to convert from global to local topology information and remove references to information in global ordering */ 1573 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1574 if (pcbddc->user_provided_isfordofs) { 1575 if (pcbddc->n_ISForDofs) { 1576 PetscInt i; 1577 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1578 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1579 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1580 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1581 } 1582 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1583 pcbddc->n_ISForDofs = 0; 1584 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1585 } 1586 } else { 1587 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1588 PetscInt i, n = matis->A->rmap->n; 1589 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1590 if (i > 1) { 1591 pcbddc->n_ISForDofsLocal = i; 1592 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1593 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1594 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1595 } 1596 } 1597 } 1598 } 1599 1600 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1601 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1602 } 1603 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1604 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1605 } 1606 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1607 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1608 } 1609 ierr = VecDestroy(&global);CHKERRQ(ierr); 1610 ierr = VecDestroy(&local);CHKERRQ(ierr); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #undef __FUNCT__ 1615 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1616 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1617 { 1618 PC_IS *pcis = (PC_IS*)(pc->data); 1619 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1620 PetscErrorCode ierr; 1621 1622 PetscFunctionBegin; 1623 if (!pcbddc->benign_have_null) { 1624 PetscFunctionReturn(0); 1625 } 1626 if (pcbddc->ChangeOfBasisMatrix) { 1627 Vec swap; 1628 1629 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1630 swap = pcbddc->work_change; 1631 pcbddc->work_change = r; 1632 r = swap; 1633 } 1634 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1636 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1637 ierr = VecSet(z,0.);CHKERRQ(ierr); 1638 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1639 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1640 if (pcbddc->ChangeOfBasisMatrix) { 1641 pcbddc->work_change = r; 1642 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1643 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1644 } 1645 PetscFunctionReturn(0); 1646 } 1647 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1650 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1651 { 1652 PCBDDCBenignMatMult_ctx ctx; 1653 PetscErrorCode ierr; 1654 PetscBool apply_right,apply_left,reset_x; 1655 1656 PetscFunctionBegin; 1657 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1658 if (transpose) { 1659 apply_right = ctx->apply_left; 1660 apply_left = ctx->apply_right; 1661 } else { 1662 apply_right = ctx->apply_right; 1663 apply_left = ctx->apply_left; 1664 } 1665 reset_x = PETSC_FALSE; 1666 if (apply_right) { 1667 const PetscScalar *ax; 1668 PetscInt nl,i; 1669 1670 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1671 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1672 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1673 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1674 for (i=0;i<ctx->benign_n;i++) { 1675 PetscScalar sum,val; 1676 const PetscInt *idxs; 1677 PetscInt nz,j; 1678 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1679 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1680 sum = 0.; 1681 if (ctx->apply_p0) { 1682 val = ctx->work[idxs[nz-1]]; 1683 for (j=0;j<nz-1;j++) { 1684 sum += ctx->work[idxs[j]]; 1685 ctx->work[idxs[j]] += val; 1686 } 1687 } else { 1688 for (j=0;j<nz-1;j++) { 1689 sum += ctx->work[idxs[j]]; 1690 } 1691 } 1692 ctx->work[idxs[nz-1]] -= sum; 1693 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1694 } 1695 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1696 reset_x = PETSC_TRUE; 1697 } 1698 if (transpose) { 1699 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1700 } else { 1701 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1702 } 1703 if (reset_x) { 1704 ierr = VecResetArray(x);CHKERRQ(ierr); 1705 } 1706 if (apply_left) { 1707 PetscScalar *ay; 1708 PetscInt i; 1709 1710 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1711 for (i=0;i<ctx->benign_n;i++) { 1712 PetscScalar sum,val; 1713 const PetscInt *idxs; 1714 PetscInt nz,j; 1715 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1716 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1717 val = -ay[idxs[nz-1]]; 1718 if (ctx->apply_p0) { 1719 sum = 0.; 1720 for (j=0;j<nz-1;j++) { 1721 sum += ay[idxs[j]]; 1722 ay[idxs[j]] += val; 1723 } 1724 ay[idxs[nz-1]] += sum; 1725 } else { 1726 for (j=0;j<nz-1;j++) { 1727 ay[idxs[j]] += val; 1728 } 1729 ay[idxs[nz-1]] = 0.; 1730 } 1731 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1732 } 1733 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1734 } 1735 PetscFunctionReturn(0); 1736 } 1737 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1740 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1741 { 1742 PetscErrorCode ierr; 1743 1744 PetscFunctionBegin; 1745 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1751 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1752 { 1753 PetscErrorCode ierr; 1754 1755 PetscFunctionBegin; 1756 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "PCBDDCBenignShellMat" 1762 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1763 { 1764 PC_IS *pcis = (PC_IS*)pc->data; 1765 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1766 PCBDDCBenignMatMult_ctx ctx; 1767 PetscErrorCode ierr; 1768 1769 PetscFunctionBegin; 1770 if (!restore) { 1771 Mat A_IB,A_BI; 1772 PetscScalar *work; 1773 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1774 1775 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1776 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1777 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1778 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1779 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1780 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1781 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1782 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1783 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1784 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1785 ctx->apply_left = PETSC_TRUE; 1786 ctx->apply_right = PETSC_FALSE; 1787 ctx->apply_p0 = PETSC_FALSE; 1788 ctx->benign_n = pcbddc->benign_n; 1789 if (reuse) { 1790 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1791 ctx->free = PETSC_FALSE; 1792 } else { /* TODO: could be optimized for successive solves */ 1793 ISLocalToGlobalMapping N_to_D; 1794 PetscInt i; 1795 1796 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1797 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1798 for (i=0;i<pcbddc->benign_n;i++) { 1799 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1800 } 1801 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1802 ctx->free = PETSC_TRUE; 1803 } 1804 ctx->A = pcis->A_IB; 1805 ctx->work = work; 1806 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1807 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1808 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1809 pcis->A_IB = A_IB; 1810 1811 /* A_BI as A_IB^T */ 1812 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1813 pcbddc->benign_original_mat = pcis->A_BI; 1814 pcis->A_BI = A_BI; 1815 } else { 1816 if (!pcbddc->benign_original_mat) { 1817 PetscFunctionReturn(0); 1818 } 1819 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1820 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1821 pcis->A_IB = ctx->A; 1822 ctx->A = NULL; 1823 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1824 pcis->A_BI = pcbddc->benign_original_mat; 1825 pcbddc->benign_original_mat = NULL; 1826 if (ctx->free) { 1827 PetscInt i; 1828 for (i=0;i<ctx->benign_n;i++) { 1829 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1830 } 1831 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1832 } 1833 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1834 ierr = PetscFree(ctx);CHKERRQ(ierr); 1835 } 1836 PetscFunctionReturn(0); 1837 } 1838 1839 /* used just in bddc debug mode */ 1840 #undef __FUNCT__ 1841 #define __FUNCT__ "PCBDDCBenignProject" 1842 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1843 { 1844 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1845 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1846 Mat An; 1847 PetscErrorCode ierr; 1848 1849 PetscFunctionBegin; 1850 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1851 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1852 if (is1) { 1853 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1854 ierr = MatDestroy(&An);CHKERRQ(ierr); 1855 } else { 1856 *B = An; 1857 } 1858 PetscFunctionReturn(0); 1859 } 1860 1861 /* TODO: add reuse flag */ 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatSeqAIJCompress" 1864 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1865 { 1866 Mat Bt; 1867 PetscScalar *a,*bdata; 1868 const PetscInt *ii,*ij; 1869 PetscInt m,n,i,nnz,*bii,*bij; 1870 PetscBool flg_row; 1871 PetscErrorCode ierr; 1872 1873 PetscFunctionBegin; 1874 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1875 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1876 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1877 nnz = n; 1878 for (i=0;i<ii[n];i++) { 1879 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1880 } 1881 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1882 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1883 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1884 nnz = 0; 1885 bii[0] = 0; 1886 for (i=0;i<n;i++) { 1887 PetscInt j; 1888 for (j=ii[i];j<ii[i+1];j++) { 1889 PetscScalar entry = a[j]; 1890 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1891 bij[nnz] = ij[j]; 1892 bdata[nnz] = entry; 1893 nnz++; 1894 } 1895 } 1896 bii[i+1] = nnz; 1897 } 1898 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1899 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1900 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1901 { 1902 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1903 b->free_a = PETSC_TRUE; 1904 b->free_ij = PETSC_TRUE; 1905 } 1906 *B = Bt; 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatDetectDisconnectedComponents" 1912 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1913 { 1914 Mat B; 1915 IS is_dummy,*cc_n; 1916 ISLocalToGlobalMapping l2gmap_dummy; 1917 PCBDDCGraph graph; 1918 PetscInt i,n; 1919 PetscInt *xadj,*adjncy; 1920 PetscInt *xadj_filtered,*adjncy_filtered; 1921 PetscBool flg_row,isseqaij; 1922 PetscErrorCode ierr; 1923 1924 PetscFunctionBegin; 1925 if (!A->rmap->N || !A->cmap->N) { 1926 *ncc = 0; 1927 *cc = NULL; 1928 PetscFunctionReturn(0); 1929 } 1930 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1931 if (!isseqaij && filter) { 1932 PetscBool isseqdense; 1933 1934 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1935 if (!isseqdense) { 1936 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1937 } else { /* TODO: rectangular case and LDA */ 1938 PetscScalar *array; 1939 PetscReal chop=1.e-6; 1940 1941 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1942 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1943 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1944 for (i=0;i<n;i++) { 1945 PetscInt j; 1946 for (j=i+1;j<n;j++) { 1947 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1948 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1949 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1950 } 1951 } 1952 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1953 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1954 } 1955 } else { 1956 B = A; 1957 } 1958 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1959 1960 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1961 if (filter) { 1962 PetscScalar *data; 1963 PetscInt j,cum; 1964 1965 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1966 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1967 cum = 0; 1968 for (i=0;i<n;i++) { 1969 PetscInt t; 1970 1971 for (j=xadj[i];j<xadj[i+1];j++) { 1972 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1973 continue; 1974 } 1975 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1976 } 1977 t = xadj_filtered[i]; 1978 xadj_filtered[i] = cum; 1979 cum += t; 1980 } 1981 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1982 } else { 1983 xadj_filtered = NULL; 1984 adjncy_filtered = NULL; 1985 } 1986 1987 /* compute local connected components using PCBDDCGraph */ 1988 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1989 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1990 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1991 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1992 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1993 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1994 if (xadj_filtered) { 1995 graph->xadj = xadj_filtered; 1996 graph->adjncy = adjncy_filtered; 1997 } else { 1998 graph->xadj = xadj; 1999 graph->adjncy = adjncy; 2000 } 2001 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2002 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2003 /* partial clean up */ 2004 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2005 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2006 if (A != B) { 2007 ierr = MatDestroy(&B);CHKERRQ(ierr); 2008 } 2009 2010 /* get back data */ 2011 if (ncc) *ncc = graph->ncc; 2012 if (cc) { 2013 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2014 for (i=0;i<graph->ncc;i++) { 2015 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2016 } 2017 *cc = cc_n; 2018 } 2019 /* clean up graph */ 2020 graph->xadj = 0; 2021 graph->adjncy = 0; 2022 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2023 PetscFunctionReturn(0); 2024 } 2025 2026 #undef __FUNCT__ 2027 #define __FUNCT__ "PCBDDCBenignCheck" 2028 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2029 { 2030 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2031 PC_IS* pcis = (PC_IS*)(pc->data); 2032 IS dirIS = NULL; 2033 PetscInt i; 2034 PetscErrorCode ierr; 2035 2036 PetscFunctionBegin; 2037 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2038 if (zerodiag) { 2039 Mat A; 2040 Vec vec3_N; 2041 PetscScalar *vals; 2042 const PetscInt *idxs; 2043 PetscInt nz,*count; 2044 2045 /* p0 */ 2046 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2047 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2048 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2049 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2050 for (i=0;i<nz;i++) vals[i] = 1.; 2051 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2052 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2053 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2054 /* v_I */ 2055 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2056 for (i=0;i<nz;i++) vals[i] = 0.; 2057 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2058 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2059 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2060 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2061 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2062 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2063 if (dirIS) { 2064 PetscInt n; 2065 2066 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2067 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2068 for (i=0;i<n;i++) vals[i] = 0.; 2069 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2070 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2071 } 2072 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2073 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2074 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2075 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2076 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2077 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2078 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2079 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2080 ierr = PetscFree(vals);CHKERRQ(ierr); 2081 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2082 2083 /* there should not be any pressure dofs lying on the interface */ 2084 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2085 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2086 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2087 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2088 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2089 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2090 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2091 ierr = PetscFree(count);CHKERRQ(ierr); 2092 } 2093 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2094 2095 /* check PCBDDCBenignGetOrSetP0 */ 2096 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2097 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2098 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2099 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2100 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2101 for (i=0;i<pcbddc->benign_n;i++) { 2102 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2103 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2104 } 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 2110 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2111 { 2112 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2113 IS pressures,zerodiag,*zerodiag_subs; 2114 PetscInt nz,n; 2115 PetscInt *interior_dofs,n_interior_dofs; 2116 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 2117 PetscErrorCode ierr; 2118 2119 PetscFunctionBegin; 2120 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2121 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2122 for (n=0;n<pcbddc->benign_n;n++) { 2123 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2124 } 2125 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2126 pcbddc->benign_n = 0; 2127 /* if a local info on dofs is present, assumes that the last field represents "pressures" 2128 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2129 Checks if all the pressure dofs in each subdomain have a zero diagonal 2130 If not, a change of basis on pressures is not needed 2131 since the local Schur complements are already SPD 2132 */ 2133 has_null_pressures = PETSC_TRUE; 2134 have_null = PETSC_TRUE; 2135 if (pcbddc->n_ISForDofsLocal) { 2136 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2137 2138 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2139 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2140 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2141 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2142 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2143 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2144 if (!sorted) { 2145 ierr = ISSort(pressures);CHKERRQ(ierr); 2146 } 2147 } else { 2148 pressures = NULL; 2149 } 2150 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2151 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2152 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2153 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2154 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2155 if (!sorted) { 2156 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2157 } 2158 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2159 if (!nz) { 2160 if (n) have_null = PETSC_FALSE; 2161 has_null_pressures = PETSC_FALSE; 2162 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2163 } 2164 recompute_zerodiag = PETSC_FALSE; 2165 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2166 zerodiag_subs = NULL; 2167 pcbddc->benign_n = 0; 2168 n_interior_dofs = 0; 2169 interior_dofs = NULL; 2170 if (pcbddc->current_level) { /* need to compute interior nodes */ 2171 PetscInt n,i,j; 2172 PetscInt n_neigh,*neigh,*n_shared,**shared; 2173 PetscInt *iwork; 2174 2175 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2176 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2177 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2178 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2179 for (i=1;i<n_neigh;i++) 2180 for (j=0;j<n_shared[i];j++) 2181 iwork[shared[i][j]] += 1; 2182 for (i=0;i<n;i++) 2183 if (!iwork[i]) 2184 interior_dofs[n_interior_dofs++] = i; 2185 ierr = PetscFree(iwork);CHKERRQ(ierr); 2186 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2187 } 2188 if (has_null_pressures) { 2189 IS *subs; 2190 PetscInt nsubs,i,j,nl; 2191 const PetscInt *idxs; 2192 PetscScalar *array; 2193 Vec *work; 2194 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2195 2196 subs = pcbddc->local_subs; 2197 nsubs = pcbddc->n_local_subs; 2198 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2199 if (pcbddc->current_level) { 2200 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2201 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2202 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2203 /* work[0] = 1_p */ 2204 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2205 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2206 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2207 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2208 /* work[0] = 1_v */ 2209 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2210 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2211 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2212 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2213 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2214 } 2215 if (nsubs > 1) { 2216 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2217 for (i=0;i<nsubs;i++) { 2218 ISLocalToGlobalMapping l2g; 2219 IS t_zerodiag_subs; 2220 PetscInt nl; 2221 2222 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2223 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2224 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2225 if (nl) { 2226 PetscBool valid = PETSC_TRUE; 2227 2228 if (pcbddc->current_level) { 2229 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2230 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2231 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2232 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2233 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2234 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2235 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2236 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2237 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2238 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2239 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2240 for (j=0;j<n_interior_dofs;j++) { 2241 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2242 valid = PETSC_FALSE; 2243 break; 2244 } 2245 } 2246 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2247 } 2248 if (valid && pcbddc->NeumannBoundariesLocal) { 2249 IS t_bc; 2250 PetscInt nzb; 2251 2252 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 2253 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 2254 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 2255 if (nzb) valid = PETSC_FALSE; 2256 } 2257 if (valid && pressures) { 2258 IS t_pressure_subs; 2259 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2260 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2261 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2262 } 2263 if (valid) { 2264 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2265 pcbddc->benign_n++; 2266 } else { 2267 recompute_zerodiag = PETSC_TRUE; 2268 } 2269 } 2270 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2271 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2272 } 2273 } else { /* there's just one subdomain (or zero if they have not been detected */ 2274 PetscBool valid = PETSC_TRUE; 2275 2276 if (pcbddc->NeumannBoundariesLocal) { 2277 PetscInt nzb; 2278 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 2279 if (nzb) valid = PETSC_FALSE; 2280 } 2281 if (valid && pressures) { 2282 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2283 } 2284 if (valid && pcbddc->current_level) { 2285 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2286 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2287 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2288 for (j=0;j<n_interior_dofs;j++) { 2289 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2290 valid = PETSC_FALSE; 2291 break; 2292 } 2293 } 2294 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2295 } 2296 if (valid) { 2297 pcbddc->benign_n = 1; 2298 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2299 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2300 zerodiag_subs[0] = zerodiag; 2301 } 2302 } 2303 if (pcbddc->current_level) { 2304 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2305 } 2306 } 2307 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2308 2309 if (!pcbddc->benign_n) { 2310 PetscInt n; 2311 2312 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2313 recompute_zerodiag = PETSC_FALSE; 2314 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2315 if (n) { 2316 has_null_pressures = PETSC_FALSE; 2317 have_null = PETSC_FALSE; 2318 } 2319 } 2320 2321 /* final check for null pressures */ 2322 if (zerodiag && pressures) { 2323 PetscInt nz,np; 2324 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2325 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2326 if (nz != np) have_null = PETSC_FALSE; 2327 } 2328 2329 if (recompute_zerodiag) { 2330 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2331 if (pcbddc->benign_n == 1) { 2332 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2333 zerodiag = zerodiag_subs[0]; 2334 } else { 2335 PetscInt i,nzn,*new_idxs; 2336 2337 nzn = 0; 2338 for (i=0;i<pcbddc->benign_n;i++) { 2339 PetscInt ns; 2340 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2341 nzn += ns; 2342 } 2343 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2344 nzn = 0; 2345 for (i=0;i<pcbddc->benign_n;i++) { 2346 PetscInt ns,*idxs; 2347 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2348 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2349 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2350 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2351 nzn += ns; 2352 } 2353 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2354 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2355 } 2356 have_null = PETSC_FALSE; 2357 } 2358 2359 /* Prepare matrix to compute no-net-flux */ 2360 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2361 Mat A,loc_divudotp; 2362 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2363 IS row,col,isused = NULL; 2364 PetscInt M,N,n,st,n_isused; 2365 2366 if (pressures) { 2367 isused = pressures; 2368 } else { 2369 isused = zerodiag; 2370 } 2371 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2372 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2373 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2374 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2375 n_isused = 0; 2376 if (isused) { 2377 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2378 } 2379 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2380 st = st-n_isused; 2381 if (n) { 2382 const PetscInt *gidxs; 2383 2384 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2385 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2386 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2387 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2388 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2389 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2390 } else { 2391 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2392 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2393 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2394 } 2395 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2396 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2397 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2398 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2399 ierr = ISDestroy(&row);CHKERRQ(ierr); 2400 ierr = ISDestroy(&col);CHKERRQ(ierr); 2401 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2402 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2403 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2404 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2405 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2406 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2407 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2408 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2409 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2410 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2411 } 2412 2413 /* change of basis and p0 dofs */ 2414 if (has_null_pressures) { 2415 IS zerodiagc; 2416 const PetscInt *idxs,*idxsc; 2417 PetscInt i,s,*nnz; 2418 2419 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2420 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2421 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2422 /* local change of basis for pressures */ 2423 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2424 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2425 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2426 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2427 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2428 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2429 for (i=0;i<pcbddc->benign_n;i++) { 2430 PetscInt nzs,j; 2431 2432 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2433 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2434 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2435 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2436 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2437 } 2438 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2439 ierr = PetscFree(nnz);CHKERRQ(ierr); 2440 /* set identity on velocities */ 2441 for (i=0;i<n-nz;i++) { 2442 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2443 } 2444 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2445 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2446 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2447 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2448 /* set change on pressures */ 2449 for (s=0;s<pcbddc->benign_n;s++) { 2450 PetscScalar *array; 2451 PetscInt nzs; 2452 2453 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2454 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2455 for (i=0;i<nzs-1;i++) { 2456 PetscScalar vals[2]; 2457 PetscInt cols[2]; 2458 2459 cols[0] = idxs[i]; 2460 cols[1] = idxs[nzs-1]; 2461 vals[0] = 1.; 2462 vals[1] = 1.; 2463 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2464 } 2465 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2466 for (i=0;i<nzs-1;i++) array[i] = -1.; 2467 array[nzs-1] = 1.; 2468 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2469 /* store local idxs for p0 */ 2470 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2471 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2472 ierr = PetscFree(array);CHKERRQ(ierr); 2473 } 2474 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2475 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2476 /* project if needed */ 2477 if (pcbddc->benign_change_explicit) { 2478 Mat M; 2479 2480 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2481 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2482 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2483 ierr = MatDestroy(&M);CHKERRQ(ierr); 2484 } 2485 /* store global idxs for p0 */ 2486 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2487 } 2488 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2489 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2490 2491 /* determines if the coarse solver will be singular or not */ 2492 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2493 /* determines if the problem has subdomains with 0 pressure block */ 2494 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2495 *zerodiaglocal = zerodiag; 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 2501 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2502 { 2503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2504 PetscScalar *array; 2505 PetscErrorCode ierr; 2506 2507 PetscFunctionBegin; 2508 if (!pcbddc->benign_sf) { 2509 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2510 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2511 } 2512 if (get) { 2513 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2514 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2515 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2516 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2517 } else { 2518 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2519 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2520 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2521 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2522 } 2523 PetscFunctionReturn(0); 2524 } 2525 2526 #undef __FUNCT__ 2527 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 2528 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2529 { 2530 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2531 PetscErrorCode ierr; 2532 2533 PetscFunctionBegin; 2534 /* TODO: add error checking 2535 - avoid nested pop (or push) calls. 2536 - cannot push before pop. 2537 - cannot call this if pcbddc->local_mat is NULL 2538 */ 2539 if (!pcbddc->benign_n) { 2540 PetscFunctionReturn(0); 2541 } 2542 if (pop) { 2543 if (pcbddc->benign_change_explicit) { 2544 IS is_p0; 2545 MatReuse reuse; 2546 2547 /* extract B_0 */ 2548 reuse = MAT_INITIAL_MATRIX; 2549 if (pcbddc->benign_B0) { 2550 reuse = MAT_REUSE_MATRIX; 2551 } 2552 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2553 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2554 /* remove rows and cols from local problem */ 2555 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2556 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2557 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2558 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2559 } else { 2560 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2561 PetscScalar *vals; 2562 PetscInt i,n,*idxs_ins; 2563 2564 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2565 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2566 if (!pcbddc->benign_B0) { 2567 PetscInt *nnz; 2568 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2569 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2570 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2571 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2572 for (i=0;i<pcbddc->benign_n;i++) { 2573 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2574 nnz[i] = n - nnz[i]; 2575 } 2576 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2577 ierr = PetscFree(nnz);CHKERRQ(ierr); 2578 } 2579 2580 for (i=0;i<pcbddc->benign_n;i++) { 2581 PetscScalar *array; 2582 PetscInt *idxs,j,nz,cum; 2583 2584 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2585 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2586 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2587 for (j=0;j<nz;j++) vals[j] = 1.; 2588 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2589 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2590 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2591 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2592 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2593 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2594 cum = 0; 2595 for (j=0;j<n;j++) { 2596 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2597 vals[cum] = array[j]; 2598 idxs_ins[cum] = j; 2599 cum++; 2600 } 2601 } 2602 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2603 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2604 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2605 } 2606 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2607 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2608 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2609 } 2610 } else { /* push */ 2611 if (pcbddc->benign_change_explicit) { 2612 PetscInt i; 2613 2614 for (i=0;i<pcbddc->benign_n;i++) { 2615 PetscScalar *B0_vals; 2616 PetscInt *B0_cols,B0_ncol; 2617 2618 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2619 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2620 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2621 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2622 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2623 } 2624 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2625 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2626 } else { 2627 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2628 } 2629 } 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2635 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2636 { 2637 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2638 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2639 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2640 PetscBLASInt *B_iwork,*B_ifail; 2641 PetscScalar *work,lwork; 2642 PetscScalar *St,*S,*eigv; 2643 PetscScalar *Sarray,*Starray; 2644 PetscReal *eigs,thresh; 2645 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2646 PetscBool allocated_S_St; 2647 #if defined(PETSC_USE_COMPLEX) 2648 PetscReal *rwork; 2649 #endif 2650 PetscErrorCode ierr; 2651 2652 PetscFunctionBegin; 2653 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2654 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2655 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2656 2657 if (pcbddc->dbg_flag) { 2658 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2660 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2661 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2662 } 2663 2664 if (pcbddc->dbg_flag) { 2665 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2666 } 2667 2668 /* max size of subsets */ 2669 mss = 0; 2670 for (i=0;i<sub_schurs->n_subs;i++) { 2671 PetscInt subset_size; 2672 2673 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2674 mss = PetscMax(mss,subset_size); 2675 } 2676 2677 /* min/max and threshold */ 2678 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2679 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2680 nmax = PetscMax(nmin,nmax); 2681 allocated_S_St = PETSC_FALSE; 2682 if (nmin) { 2683 allocated_S_St = PETSC_TRUE; 2684 } 2685 2686 /* allocate lapack workspace */ 2687 cum = cum2 = 0; 2688 maxneigs = 0; 2689 for (i=0;i<sub_schurs->n_subs;i++) { 2690 PetscInt n,subset_size; 2691 2692 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2693 n = PetscMin(subset_size,nmax); 2694 cum += subset_size; 2695 cum2 += subset_size*n; 2696 maxneigs = PetscMax(maxneigs,n); 2697 } 2698 if (mss) { 2699 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2700 PetscBLASInt B_itype = 1; 2701 PetscBLASInt B_N = mss; 2702 PetscReal zero = 0.0; 2703 PetscReal eps = 0.0; /* dlamch? */ 2704 2705 B_lwork = -1; 2706 S = NULL; 2707 St = NULL; 2708 eigs = NULL; 2709 eigv = NULL; 2710 B_iwork = NULL; 2711 B_ifail = NULL; 2712 #if defined(PETSC_USE_COMPLEX) 2713 rwork = NULL; 2714 #endif 2715 thresh = 1.0; 2716 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2717 #if defined(PETSC_USE_COMPLEX) 2718 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2719 #else 2720 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2721 #endif 2722 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2723 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2724 } else { 2725 /* TODO */ 2726 } 2727 } else { 2728 lwork = 0; 2729 } 2730 2731 nv = 0; 2732 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2733 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2734 } 2735 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2736 if (allocated_S_St) { 2737 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2738 } 2739 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2740 #if defined(PETSC_USE_COMPLEX) 2741 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2742 #endif 2743 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2744 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2745 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2746 nv+cum,&pcbddc->adaptive_constraints_idxs, 2747 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2748 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2749 2750 maxneigs = 0; 2751 cum = cumarray = 0; 2752 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2753 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2754 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2755 const PetscInt *idxs; 2756 2757 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2758 for (cum=0;cum<nv;cum++) { 2759 pcbddc->adaptive_constraints_n[cum] = 1; 2760 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2761 pcbddc->adaptive_constraints_data[cum] = 1.0; 2762 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2763 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2764 } 2765 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2766 } 2767 2768 if (mss) { /* multilevel */ 2769 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2770 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2771 } 2772 2773 thresh = pcbddc->adaptive_threshold; 2774 for (i=0;i<sub_schurs->n_subs;i++) { 2775 const PetscInt *idxs; 2776 PetscReal upper,lower; 2777 PetscInt j,subset_size,eigs_start = 0; 2778 PetscBLASInt B_N; 2779 PetscBool same_data = PETSC_FALSE; 2780 2781 if (pcbddc->use_deluxe_scaling) { 2782 upper = PETSC_MAX_REAL; 2783 lower = thresh; 2784 } else { 2785 upper = 1./thresh; 2786 lower = 0.; 2787 } 2788 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2789 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2790 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2791 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2792 if (sub_schurs->is_hermitian) { 2793 PetscInt j,k; 2794 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2795 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2796 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2797 } 2798 for (j=0;j<subset_size;j++) { 2799 for (k=j;k<subset_size;k++) { 2800 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2801 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2802 } 2803 } 2804 } else { 2805 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2806 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2807 } 2808 } else { 2809 S = Sarray + cumarray; 2810 St = Starray + cumarray; 2811 } 2812 /* see if we can save some work */ 2813 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2814 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2815 } 2816 2817 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2818 B_neigs = 0; 2819 } else { 2820 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2821 PetscBLASInt B_itype = 1; 2822 PetscBLASInt B_IL, B_IU; 2823 PetscReal eps = -1.0; /* dlamch? */ 2824 PetscInt nmin_s; 2825 PetscBool compute_range = PETSC_FALSE; 2826 2827 if (pcbddc->dbg_flag) { 2828 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 2829 } 2830 2831 compute_range = PETSC_FALSE; 2832 if (thresh > 1.+PETSC_SMALL && !same_data) { 2833 compute_range = PETSC_TRUE; 2834 } 2835 2836 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2837 if (compute_range) { 2838 2839 /* ask for eigenvalues larger than thresh */ 2840 #if defined(PETSC_USE_COMPLEX) 2841 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2842 #else 2843 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2844 #endif 2845 } else if (!same_data) { 2846 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2847 B_IL = 1; 2848 #if defined(PETSC_USE_COMPLEX) 2849 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2850 #else 2851 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2852 #endif 2853 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 2854 PetscInt k; 2855 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2856 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2857 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2858 nmin = nmax; 2859 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2860 for (k=0;k<nmax;k++) { 2861 eigs[k] = 1./PETSC_SMALL; 2862 eigv[k*(subset_size+1)] = 1.0; 2863 } 2864 } 2865 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2866 if (B_ierr) { 2867 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2868 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2869 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2870 } 2871 2872 if (B_neigs > nmax) { 2873 if (pcbddc->dbg_flag) { 2874 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2875 } 2876 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2877 B_neigs = nmax; 2878 } 2879 2880 nmin_s = PetscMin(nmin,B_N); 2881 if (B_neigs < nmin_s) { 2882 PetscBLASInt B_neigs2; 2883 2884 if (pcbddc->use_deluxe_scaling) { 2885 B_IL = B_N - nmin_s + 1; 2886 B_IU = B_N - B_neigs; 2887 } else { 2888 B_IL = B_neigs + 1; 2889 B_IU = nmin_s; 2890 } 2891 if (pcbddc->dbg_flag) { 2892 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2893 } 2894 if (sub_schurs->is_hermitian) { 2895 PetscInt j,k; 2896 for (j=0;j<subset_size;j++) { 2897 for (k=j;k<subset_size;k++) { 2898 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2899 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2900 } 2901 } 2902 } else { 2903 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2904 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2905 } 2906 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2907 #if defined(PETSC_USE_COMPLEX) 2908 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2909 #else 2910 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2911 #endif 2912 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2913 B_neigs += B_neigs2; 2914 } 2915 if (B_ierr) { 2916 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2917 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2918 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2919 } 2920 if (pcbddc->dbg_flag) { 2921 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2922 for (j=0;j<B_neigs;j++) { 2923 if (eigs[j] == 0.0) { 2924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2925 } else { 2926 if (pcbddc->use_deluxe_scaling) { 2927 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2928 } else { 2929 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2930 } 2931 } 2932 } 2933 } 2934 } else { 2935 /* TODO */ 2936 } 2937 } 2938 /* change the basis back to the original one */ 2939 if (sub_schurs->change) { 2940 Mat change,phi,phit; 2941 2942 if (pcbddc->dbg_flag > 1) { 2943 PetscInt ii; 2944 for (ii=0;ii<B_neigs;ii++) { 2945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2946 for (j=0;j<B_N;j++) { 2947 #if defined(PETSC_USE_COMPLEX) 2948 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2949 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2950 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2951 #else 2952 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2953 #endif 2954 } 2955 } 2956 } 2957 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2958 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2959 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2960 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2961 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2962 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2963 } 2964 maxneigs = PetscMax(B_neigs,maxneigs); 2965 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2966 if (B_neigs) { 2967 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2968 2969 if (pcbddc->dbg_flag > 1) { 2970 PetscInt ii; 2971 for (ii=0;ii<B_neigs;ii++) { 2972 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2973 for (j=0;j<B_N;j++) { 2974 #if defined(PETSC_USE_COMPLEX) 2975 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2976 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2977 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2978 #else 2979 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2980 #endif 2981 } 2982 } 2983 } 2984 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2985 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2986 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2987 cum++; 2988 } 2989 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2990 /* shift for next computation */ 2991 cumarray += subset_size*subset_size; 2992 } 2993 if (pcbddc->dbg_flag) { 2994 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2995 } 2996 2997 if (mss) { 2998 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2999 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3000 /* destroy matrices (junk) */ 3001 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3002 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3003 } 3004 if (allocated_S_St) { 3005 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3006 } 3007 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3008 #if defined(PETSC_USE_COMPLEX) 3009 ierr = PetscFree(rwork);CHKERRQ(ierr); 3010 #endif 3011 if (pcbddc->dbg_flag) { 3012 PetscInt maxneigs_r; 3013 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3014 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3015 } 3016 PetscFunctionReturn(0); 3017 } 3018 3019 #undef __FUNCT__ 3020 #define __FUNCT__ "PCBDDCSetUpSolvers" 3021 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3022 { 3023 PetscScalar *coarse_submat_vals; 3024 PetscErrorCode ierr; 3025 3026 PetscFunctionBegin; 3027 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3028 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3029 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3030 3031 /* Setup local neumann solver ksp_R */ 3032 /* PCBDDCSetUpLocalScatters should be called first! */ 3033 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3034 3035 /* 3036 Setup local correction and local part of coarse basis. 3037 Gives back the dense local part of the coarse matrix in column major ordering 3038 */ 3039 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3040 3041 /* Compute total number of coarse nodes and setup coarse solver */ 3042 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3043 3044 /* free */ 3045 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3046 PetscFunctionReturn(0); 3047 } 3048 3049 #undef __FUNCT__ 3050 #define __FUNCT__ "PCBDDCResetCustomization" 3051 PetscErrorCode PCBDDCResetCustomization(PC pc) 3052 { 3053 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3054 PetscErrorCode ierr; 3055 3056 PetscFunctionBegin; 3057 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3058 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3059 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3060 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3061 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3062 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3063 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3064 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3065 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3066 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 #undef __FUNCT__ 3071 #define __FUNCT__ "PCBDDCResetTopography" 3072 PetscErrorCode PCBDDCResetTopography(PC pc) 3073 { 3074 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3075 PetscInt i; 3076 PetscErrorCode ierr; 3077 3078 PetscFunctionBegin; 3079 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3080 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3081 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3082 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3083 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3084 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3085 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3086 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3087 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3088 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3089 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3090 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3091 for (i=0;i<pcbddc->n_local_subs;i++) { 3092 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3093 } 3094 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3095 if (pcbddc->sub_schurs) { 3096 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 3097 } 3098 pcbddc->graphanalyzed = PETSC_FALSE; 3099 pcbddc->recompute_topography = PETSC_TRUE; 3100 PetscFunctionReturn(0); 3101 } 3102 3103 #undef __FUNCT__ 3104 #define __FUNCT__ "PCBDDCResetSolvers" 3105 PetscErrorCode PCBDDCResetSolvers(PC pc) 3106 { 3107 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3108 PetscErrorCode ierr; 3109 3110 PetscFunctionBegin; 3111 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3112 if (pcbddc->coarse_phi_B) { 3113 PetscScalar *array; 3114 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3115 ierr = PetscFree(array);CHKERRQ(ierr); 3116 } 3117 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3118 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3119 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3120 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3121 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3122 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3123 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3124 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3125 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3126 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3127 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3128 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3129 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3130 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3131 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 3132 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 3133 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 3134 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3135 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3136 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3137 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3138 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3139 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3140 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3141 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3142 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3143 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3144 if (pcbddc->benign_zerodiag_subs) { 3145 PetscInt i; 3146 for (i=0;i<pcbddc->benign_n;i++) { 3147 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3148 } 3149 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3150 } 3151 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3152 PetscFunctionReturn(0); 3153 } 3154 3155 #undef __FUNCT__ 3156 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 3157 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3158 { 3159 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3160 PC_IS *pcis = (PC_IS*)pc->data; 3161 VecType impVecType; 3162 PetscInt n_constraints,n_R,old_size; 3163 PetscErrorCode ierr; 3164 3165 PetscFunctionBegin; 3166 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 3167 /* get sizes */ 3168 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3169 n_R = pcis->n - pcbddc->n_vertices; 3170 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3171 /* local work vectors (try to avoid unneeded work)*/ 3172 /* R nodes */ 3173 old_size = -1; 3174 if (pcbddc->vec1_R) { 3175 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3176 } 3177 if (n_R != old_size) { 3178 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3179 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3180 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3181 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3182 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3183 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3184 } 3185 /* local primal dofs */ 3186 old_size = -1; 3187 if (pcbddc->vec1_P) { 3188 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3189 } 3190 if (pcbddc->local_primal_size != old_size) { 3191 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3192 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3193 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3194 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3195 } 3196 /* local explicit constraints */ 3197 old_size = -1; 3198 if (pcbddc->vec1_C) { 3199 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3200 } 3201 if (n_constraints && n_constraints != old_size) { 3202 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3203 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3204 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3205 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3206 } 3207 PetscFunctionReturn(0); 3208 } 3209 3210 #undef __FUNCT__ 3211 #define __FUNCT__ "PCBDDCSetUpCorrection" 3212 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3213 { 3214 PetscErrorCode ierr; 3215 /* pointers to pcis and pcbddc */ 3216 PC_IS* pcis = (PC_IS*)pc->data; 3217 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3218 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3219 /* submatrices of local problem */ 3220 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3221 /* submatrices of local coarse problem */ 3222 Mat S_VV,S_CV,S_VC,S_CC; 3223 /* working matrices */ 3224 Mat C_CR; 3225 /* additional working stuff */ 3226 PC pc_R; 3227 Mat F; 3228 Vec dummy_vec; 3229 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3230 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3231 PetscScalar *work; 3232 PetscInt *idx_V_B; 3233 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3234 PetscInt i,n_R,n_D,n_B; 3235 3236 /* some shortcuts to scalars */ 3237 PetscScalar one=1.0,m_one=-1.0; 3238 3239 PetscFunctionBegin; 3240 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3241 3242 /* Set Non-overlapping dimensions */ 3243 n_vertices = pcbddc->n_vertices; 3244 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3245 n_B = pcis->n_B; 3246 n_D = pcis->n - n_B; 3247 n_R = pcis->n - n_vertices; 3248 3249 /* vertices in boundary numbering */ 3250 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3251 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3252 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3253 3254 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3255 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3256 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3257 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3258 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3259 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3260 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3261 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3262 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3263 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3264 3265 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3266 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3267 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3268 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3269 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3270 lda_rhs = n_R; 3271 need_benign_correction = PETSC_FALSE; 3272 if (isLU || isILU || isCHOL) { 3273 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3274 } else if (sub_schurs && sub_schurs->reuse_solver) { 3275 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3276 MatFactorType type; 3277 3278 F = reuse_solver->F; 3279 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3280 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3281 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3282 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3283 } else { 3284 F = NULL; 3285 } 3286 3287 /* allocate workspace */ 3288 n = 0; 3289 if (n_constraints) { 3290 n += lda_rhs*n_constraints; 3291 } 3292 if (n_vertices) { 3293 n = PetscMax(2*lda_rhs*n_vertices,n); 3294 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3295 } 3296 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3297 3298 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3299 dummy_vec = NULL; 3300 if (need_benign_correction && lda_rhs != n_R && F) { 3301 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3302 } 3303 3304 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3305 if (n_constraints) { 3306 Mat M1,M2,M3,C_B; 3307 IS is_aux; 3308 PetscScalar *array,*array2; 3309 3310 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3311 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3312 3313 /* Extract constraints on R nodes: C_{CR} */ 3314 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3315 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3316 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3317 3318 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3319 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3320 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3321 for (i=0;i<n_constraints;i++) { 3322 const PetscScalar *row_cmat_values; 3323 const PetscInt *row_cmat_indices; 3324 PetscInt size_of_constraint,j; 3325 3326 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3327 for (j=0;j<size_of_constraint;j++) { 3328 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3329 } 3330 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3331 } 3332 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3333 if (F) { 3334 Mat B; 3335 3336 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3337 if (need_benign_correction) { 3338 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3339 3340 /* rhs is already zero on interior dofs, no need to change the rhs */ 3341 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3342 } 3343 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3344 if (need_benign_correction) { 3345 PetscScalar *marr; 3346 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3347 3348 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3349 if (lda_rhs != n_R) { 3350 for (i=0;i<n_constraints;i++) { 3351 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3352 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3353 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3354 } 3355 } else { 3356 for (i=0;i<n_constraints;i++) { 3357 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3358 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3359 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3360 } 3361 } 3362 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3363 } 3364 ierr = MatDestroy(&B);CHKERRQ(ierr); 3365 } else { 3366 PetscScalar *marr; 3367 3368 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3369 for (i=0;i<n_constraints;i++) { 3370 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3371 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3372 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3373 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3374 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3375 } 3376 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3377 } 3378 if (!pcbddc->switch_static) { 3379 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3380 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3381 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3382 for (i=0;i<n_constraints;i++) { 3383 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3384 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3385 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3386 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3387 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3388 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3389 } 3390 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3391 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3392 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3393 } else { 3394 if (lda_rhs != n_R) { 3395 IS dummy; 3396 3397 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3398 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3399 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3400 } else { 3401 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3402 pcbddc->local_auxmat2 = local_auxmat2_R; 3403 } 3404 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3405 } 3406 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3407 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3408 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3409 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3410 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3411 if (isCHOL) { 3412 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3413 } else { 3414 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3415 } 3416 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3417 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3418 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3419 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3420 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3421 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3422 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3423 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3424 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3425 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3426 } 3427 3428 /* Get submatrices from subdomain matrix */ 3429 if (n_vertices) { 3430 IS is_aux; 3431 3432 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3433 IS tis; 3434 3435 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3436 ierr = ISSort(tis);CHKERRQ(ierr); 3437 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3438 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3439 } else { 3440 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3441 } 3442 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3443 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3444 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3445 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3446 } 3447 3448 /* Matrix of coarse basis functions (local) */ 3449 if (pcbddc->coarse_phi_B) { 3450 PetscInt on_B,on_primal,on_D=n_D; 3451 if (pcbddc->coarse_phi_D) { 3452 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3453 } 3454 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3455 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3456 PetscScalar *marray; 3457 3458 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3459 ierr = PetscFree(marray);CHKERRQ(ierr); 3460 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3461 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3462 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3463 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3464 } 3465 } 3466 3467 if (!pcbddc->coarse_phi_B) { 3468 PetscScalar *marray; 3469 3470 n = n_B*pcbddc->local_primal_size; 3471 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3472 n += n_D*pcbddc->local_primal_size; 3473 } 3474 if (!pcbddc->symmetric_primal) { 3475 n *= 2; 3476 } 3477 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 3478 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3479 n = n_B*pcbddc->local_primal_size; 3480 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3481 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3482 n += n_D*pcbddc->local_primal_size; 3483 } 3484 if (!pcbddc->symmetric_primal) { 3485 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3486 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3487 n = n_B*pcbddc->local_primal_size; 3488 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3489 } 3490 } else { 3491 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3492 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3493 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3494 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3495 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3496 } 3497 } 3498 } 3499 3500 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3501 p0_lidx_I = NULL; 3502 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3503 const PetscInt *idxs; 3504 3505 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3506 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3507 for (i=0;i<pcbddc->benign_n;i++) { 3508 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3509 } 3510 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3511 } 3512 3513 /* vertices */ 3514 if (n_vertices) { 3515 3516 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3517 3518 if (n_R) { 3519 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3520 PetscBLASInt B_N,B_one = 1; 3521 PetscScalar *x,*y; 3522 PetscBool isseqaij; 3523 3524 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3525 if (need_benign_correction) { 3526 ISLocalToGlobalMapping RtoN; 3527 IS is_p0; 3528 PetscInt *idxs_p0,n; 3529 3530 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3531 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3532 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3533 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3534 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3535 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3536 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3537 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3538 } 3539 3540 if (lda_rhs == n_R) { 3541 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3542 } else { 3543 PetscScalar *av,*array; 3544 const PetscInt *xadj,*adjncy; 3545 PetscInt n; 3546 PetscBool flg_row; 3547 3548 array = work+lda_rhs*n_vertices; 3549 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3550 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3551 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3552 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3553 for (i=0;i<n;i++) { 3554 PetscInt j; 3555 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3556 } 3557 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3558 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3559 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3560 } 3561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3562 if (need_benign_correction) { 3563 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3564 PetscScalar *marr; 3565 3566 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3567 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3568 3569 | 0 0 0 | (V) 3570 L = | 0 0 -1 | (P-p0) 3571 | 0 0 -1 | (p0) 3572 3573 */ 3574 for (i=0;i<reuse_solver->benign_n;i++) { 3575 const PetscScalar *vals; 3576 const PetscInt *idxs,*idxs_zero; 3577 PetscInt n,j,nz; 3578 3579 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3580 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3581 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3582 for (j=0;j<n;j++) { 3583 PetscScalar val = vals[j]; 3584 PetscInt k,col = idxs[j]; 3585 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3586 } 3587 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3588 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3589 } 3590 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3591 } 3592 if (F) { 3593 /* need to correct the rhs */ 3594 if (need_benign_correction) { 3595 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3596 PetscScalar *marr; 3597 3598 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3599 if (lda_rhs != n_R) { 3600 for (i=0;i<n_vertices;i++) { 3601 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3602 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3603 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3604 } 3605 } else { 3606 for (i=0;i<n_vertices;i++) { 3607 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3608 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3609 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3610 } 3611 } 3612 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3613 } 3614 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3615 /* need to correct the solution */ 3616 if (need_benign_correction) { 3617 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3618 PetscScalar *marr; 3619 3620 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3621 if (lda_rhs != n_R) { 3622 for (i=0;i<n_vertices;i++) { 3623 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3624 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3625 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3626 } 3627 } else { 3628 for (i=0;i<n_vertices;i++) { 3629 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3630 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3631 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3632 } 3633 } 3634 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3635 } 3636 } else { 3637 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3638 for (i=0;i<n_vertices;i++) { 3639 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3640 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3641 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3642 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3643 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3644 } 3645 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3646 } 3647 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3648 /* S_VV and S_CV */ 3649 if (n_constraints) { 3650 Mat B; 3651 3652 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3653 for (i=0;i<n_vertices;i++) { 3654 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3655 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3656 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3657 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3658 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3659 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3660 } 3661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3662 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3663 ierr = MatDestroy(&B);CHKERRQ(ierr); 3664 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3665 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3666 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3667 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3668 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3669 ierr = MatDestroy(&B);CHKERRQ(ierr); 3670 } 3671 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3672 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3673 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3674 } 3675 if (lda_rhs != n_R) { 3676 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3677 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3678 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3679 } 3680 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3681 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3682 if (need_benign_correction) { 3683 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3684 PetscScalar *marr,*sums; 3685 3686 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3687 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3688 for (i=0;i<reuse_solver->benign_n;i++) { 3689 const PetscScalar *vals; 3690 const PetscInt *idxs,*idxs_zero; 3691 PetscInt n,j,nz; 3692 3693 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3694 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3695 for (j=0;j<n_vertices;j++) { 3696 PetscInt k; 3697 sums[j] = 0.; 3698 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3699 } 3700 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3701 for (j=0;j<n;j++) { 3702 PetscScalar val = vals[j]; 3703 PetscInt k; 3704 for (k=0;k<n_vertices;k++) { 3705 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3706 } 3707 } 3708 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3709 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3710 } 3711 ierr = PetscFree(sums);CHKERRQ(ierr); 3712 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3713 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3714 } 3715 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3716 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3717 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3718 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3719 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3720 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3721 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3722 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3723 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3724 } else { 3725 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3726 } 3727 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3728 3729 /* coarse basis functions */ 3730 for (i=0;i<n_vertices;i++) { 3731 PetscScalar *y; 3732 3733 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3734 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3735 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3736 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3737 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3738 y[n_B*i+idx_V_B[i]] = 1.0; 3739 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3740 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3741 3742 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3743 PetscInt j; 3744 3745 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3746 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3747 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3748 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3749 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3750 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3751 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3752 } 3753 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3754 } 3755 /* if n_R == 0 the object is not destroyed */ 3756 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3757 } 3758 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3759 3760 if (n_constraints) { 3761 Mat B; 3762 3763 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3764 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3765 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3766 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3767 if (n_vertices) { 3768 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3769 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3770 } else { 3771 Mat S_VCt; 3772 3773 if (lda_rhs != n_R) { 3774 ierr = MatDestroy(&B);CHKERRQ(ierr); 3775 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3776 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3777 } 3778 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3779 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3780 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3781 } 3782 } 3783 ierr = MatDestroy(&B);CHKERRQ(ierr); 3784 /* coarse basis functions */ 3785 for (i=0;i<n_constraints;i++) { 3786 PetscScalar *y; 3787 3788 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3789 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3790 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3791 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3792 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3793 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3794 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3795 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3796 PetscInt j; 3797 3798 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3799 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3800 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3801 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3802 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3803 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3804 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3805 } 3806 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3807 } 3808 } 3809 if (n_constraints) { 3810 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3811 } 3812 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3813 3814 /* coarse matrix entries relative to B_0 */ 3815 if (pcbddc->benign_n) { 3816 Mat B0_B,B0_BPHI; 3817 IS is_dummy; 3818 PetscScalar *data; 3819 PetscInt j; 3820 3821 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3822 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3823 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3824 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3825 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3826 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3827 for (j=0;j<pcbddc->benign_n;j++) { 3828 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3829 for (i=0;i<pcbddc->local_primal_size;i++) { 3830 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3831 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3832 } 3833 } 3834 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3835 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3836 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3837 } 3838 3839 /* compute other basis functions for non-symmetric problems */ 3840 if (!pcbddc->symmetric_primal) { 3841 Mat B_V=NULL,B_C=NULL; 3842 PetscScalar *marray; 3843 3844 if (n_constraints) { 3845 Mat S_CCT,C_CRT; 3846 3847 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3848 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3849 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3850 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3851 if (n_vertices) { 3852 Mat S_VCT; 3853 3854 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3855 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3856 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3857 } 3858 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3859 } else { 3860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3861 } 3862 if (n_vertices && n_R) { 3863 PetscScalar *av,*marray; 3864 const PetscInt *xadj,*adjncy; 3865 PetscInt n; 3866 PetscBool flg_row; 3867 3868 /* B_V = B_V - A_VR^T */ 3869 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3870 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3871 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3872 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3873 for (i=0;i<n;i++) { 3874 PetscInt j; 3875 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3876 } 3877 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3878 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3879 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3880 } 3881 3882 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3883 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3884 for (i=0;i<n_vertices;i++) { 3885 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3886 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3887 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3888 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3889 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3890 } 3891 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3892 if (B_C) { 3893 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3894 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3895 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3896 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3897 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3898 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3899 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3900 } 3901 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3902 } 3903 /* coarse basis functions */ 3904 for (i=0;i<pcbddc->local_primal_size;i++) { 3905 PetscScalar *y; 3906 3907 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3908 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3909 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3910 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3911 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3912 if (i<n_vertices) { 3913 y[n_B*i+idx_V_B[i]] = 1.0; 3914 } 3915 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3916 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3917 3918 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3919 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3920 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3921 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3922 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3923 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3924 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3925 } 3926 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3927 } 3928 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3929 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3930 } 3931 /* free memory */ 3932 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3933 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3934 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3935 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3936 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3937 ierr = PetscFree(work);CHKERRQ(ierr); 3938 if (n_vertices) { 3939 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3940 } 3941 if (n_constraints) { 3942 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3943 } 3944 /* Checking coarse_sub_mat and coarse basis functios */ 3945 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3946 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3947 if (pcbddc->dbg_flag) { 3948 Mat coarse_sub_mat; 3949 Mat AUXMAT,TM1,TM2,TM3,TM4; 3950 Mat coarse_phi_D,coarse_phi_B; 3951 Mat coarse_psi_D,coarse_psi_B; 3952 Mat A_II,A_BB,A_IB,A_BI; 3953 Mat C_B,CPHI; 3954 IS is_dummy; 3955 Vec mones; 3956 MatType checkmattype=MATSEQAIJ; 3957 PetscReal real_value; 3958 3959 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3960 Mat A; 3961 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3962 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3963 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3964 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3965 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3966 ierr = MatDestroy(&A);CHKERRQ(ierr); 3967 } else { 3968 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3969 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3970 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3971 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3972 } 3973 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3974 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3975 if (!pcbddc->symmetric_primal) { 3976 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3977 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3978 } 3979 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3980 3981 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3982 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3983 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3984 if (!pcbddc->symmetric_primal) { 3985 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3986 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3987 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3988 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3989 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3990 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3991 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3992 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3993 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3994 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3995 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3996 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3997 } else { 3998 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3999 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4000 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4001 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4002 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4003 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4004 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4005 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4006 } 4007 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4008 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4009 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4010 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4011 if (pcbddc->benign_n) { 4012 Mat B0_B,B0_BPHI; 4013 PetscScalar *data,*data2; 4014 PetscInt j; 4015 4016 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4017 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4018 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4019 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4020 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4021 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4022 for (j=0;j<pcbddc->benign_n;j++) { 4023 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4024 for (i=0;i<pcbddc->local_primal_size;i++) { 4025 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4026 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4027 } 4028 } 4029 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4030 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4031 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4032 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4033 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4034 } 4035 #if 0 4036 { 4037 PetscViewer viewer; 4038 char filename[256]; 4039 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4040 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4041 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4042 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4043 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4044 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4045 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4046 if (save_change) { 4047 Mat phi_B; 4048 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4049 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4050 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4051 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4052 } else { 4053 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4054 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4055 } 4056 if (pcbddc->coarse_phi_D) { 4057 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4058 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4059 } 4060 if (pcbddc->coarse_psi_B) { 4061 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4062 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4063 } 4064 if (pcbddc->coarse_psi_D) { 4065 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4066 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4067 } 4068 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4069 } 4070 #endif 4071 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4072 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4073 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4074 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4075 4076 /* check constraints */ 4077 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4078 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4079 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4080 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4081 } else { 4082 PetscScalar *data; 4083 Mat tmat; 4084 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4085 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4086 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4087 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4088 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4089 } 4090 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4091 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4092 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4093 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4095 if (!pcbddc->symmetric_primal) { 4096 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4097 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4098 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4099 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4100 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4101 } 4102 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4103 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4104 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4105 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4106 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4107 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4108 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4109 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4110 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4111 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4112 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4113 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4114 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4115 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4116 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4117 if (!pcbddc->symmetric_primal) { 4118 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4119 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4120 } 4121 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4122 } 4123 /* get back data */ 4124 *coarse_submat_vals_n = coarse_submat_vals; 4125 PetscFunctionReturn(0); 4126 } 4127 4128 #undef __FUNCT__ 4129 #define __FUNCT__ "MatGetSubMatrixUnsorted" 4130 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4131 { 4132 Mat *work_mat; 4133 IS isrow_s,iscol_s; 4134 PetscBool rsorted,csorted; 4135 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4136 PetscErrorCode ierr; 4137 4138 PetscFunctionBegin; 4139 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4140 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4141 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4142 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4143 4144 if (!rsorted) { 4145 const PetscInt *idxs; 4146 PetscInt *idxs_sorted,i; 4147 4148 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4149 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4150 for (i=0;i<rsize;i++) { 4151 idxs_perm_r[i] = i; 4152 } 4153 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4154 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4155 for (i=0;i<rsize;i++) { 4156 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4157 } 4158 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4159 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4160 } else { 4161 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4162 isrow_s = isrow; 4163 } 4164 4165 if (!csorted) { 4166 if (isrow == iscol) { 4167 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4168 iscol_s = isrow_s; 4169 } else { 4170 const PetscInt *idxs; 4171 PetscInt *idxs_sorted,i; 4172 4173 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4174 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4175 for (i=0;i<csize;i++) { 4176 idxs_perm_c[i] = i; 4177 } 4178 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4179 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4180 for (i=0;i<csize;i++) { 4181 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4182 } 4183 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4184 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4185 } 4186 } else { 4187 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4188 iscol_s = iscol; 4189 } 4190 4191 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4192 4193 if (!rsorted || !csorted) { 4194 Mat new_mat; 4195 IS is_perm_r,is_perm_c; 4196 4197 if (!rsorted) { 4198 PetscInt *idxs_r,i; 4199 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4200 for (i=0;i<rsize;i++) { 4201 idxs_r[idxs_perm_r[i]] = i; 4202 } 4203 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4204 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4205 } else { 4206 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4207 } 4208 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4209 4210 if (!csorted) { 4211 if (isrow_s == iscol_s) { 4212 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4213 is_perm_c = is_perm_r; 4214 } else { 4215 PetscInt *idxs_c,i; 4216 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4217 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4218 for (i=0;i<csize;i++) { 4219 idxs_c[idxs_perm_c[i]] = i; 4220 } 4221 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4222 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4223 } 4224 } else { 4225 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4226 } 4227 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4228 4229 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4230 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4231 work_mat[0] = new_mat; 4232 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4233 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4234 } 4235 4236 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4237 *B = work_mat[0]; 4238 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4239 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4240 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4241 PetscFunctionReturn(0); 4242 } 4243 4244 #undef __FUNCT__ 4245 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 4246 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4247 { 4248 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4249 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4250 Mat new_mat; 4251 IS is_local,is_global; 4252 PetscInt local_size; 4253 PetscBool isseqaij; 4254 PetscErrorCode ierr; 4255 4256 PetscFunctionBegin; 4257 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4258 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4259 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4260 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4261 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4262 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4263 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4264 4265 /* check */ 4266 if (pcbddc->dbg_flag) { 4267 Vec x,x_change; 4268 PetscReal error; 4269 4270 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4271 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4272 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4273 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4274 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4275 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4276 if (!pcbddc->change_interior) { 4277 const PetscScalar *x,*y,*v; 4278 PetscReal lerror = 0.; 4279 PetscInt i; 4280 4281 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4282 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4283 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4284 for (i=0;i<local_size;i++) 4285 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4286 lerror = PetscAbsScalar(x[i]-y[i]); 4287 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4288 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4289 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4290 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4291 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on I: %1.6e\n",error);CHKERRQ(ierr); 4292 } 4293 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4294 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4295 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4296 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4297 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4298 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 4299 ierr = VecDestroy(&x);CHKERRQ(ierr); 4300 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4301 } 4302 4303 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4304 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4305 if (isseqaij) { 4306 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4307 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4308 } else { 4309 Mat work_mat; 4310 4311 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4312 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4313 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4314 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4315 } 4316 if (matis->A->symmetric_set) { 4317 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4318 #if !defined(PETSC_USE_COMPLEX) 4319 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4320 #endif 4321 } 4322 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4323 PetscFunctionReturn(0); 4324 } 4325 4326 #undef __FUNCT__ 4327 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 4328 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4329 { 4330 PC_IS* pcis = (PC_IS*)(pc->data); 4331 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4332 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4333 PetscInt *idx_R_local=NULL; 4334 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4335 PetscInt vbs,bs; 4336 PetscBT bitmask=NULL; 4337 PetscErrorCode ierr; 4338 4339 PetscFunctionBegin; 4340 /* 4341 No need to setup local scatters if 4342 - primal space is unchanged 4343 AND 4344 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4345 AND 4346 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4347 */ 4348 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4349 PetscFunctionReturn(0); 4350 } 4351 /* destroy old objects */ 4352 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4353 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4354 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4355 /* Set Non-overlapping dimensions */ 4356 n_B = pcis->n_B; 4357 n_D = pcis->n - n_B; 4358 n_vertices = pcbddc->n_vertices; 4359 4360 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4361 4362 /* create auxiliary bitmask and allocate workspace */ 4363 if (!sub_schurs || !sub_schurs->reuse_solver) { 4364 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4365 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4366 for (i=0;i<n_vertices;i++) { 4367 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4368 } 4369 4370 for (i=0, n_R=0; i<pcis->n; i++) { 4371 if (!PetscBTLookup(bitmask,i)) { 4372 idx_R_local[n_R++] = i; 4373 } 4374 } 4375 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4376 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4377 4378 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4379 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4380 } 4381 4382 /* Block code */ 4383 vbs = 1; 4384 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4385 if (bs>1 && !(n_vertices%bs)) { 4386 PetscBool is_blocked = PETSC_TRUE; 4387 PetscInt *vary; 4388 if (!sub_schurs || !sub_schurs->reuse_solver) { 4389 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4390 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4391 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4392 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4393 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4394 for (i=0; i<pcis->n/bs; i++) { 4395 if (vary[i]!=0 && vary[i]!=bs) { 4396 is_blocked = PETSC_FALSE; 4397 break; 4398 } 4399 } 4400 ierr = PetscFree(vary);CHKERRQ(ierr); 4401 } else { 4402 /* Verify directly the R set */ 4403 for (i=0; i<n_R/bs; i++) { 4404 PetscInt j,node=idx_R_local[bs*i]; 4405 for (j=1; j<bs; j++) { 4406 if (node != idx_R_local[bs*i+j]-j) { 4407 is_blocked = PETSC_FALSE; 4408 break; 4409 } 4410 } 4411 } 4412 } 4413 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4414 vbs = bs; 4415 for (i=0;i<n_R/vbs;i++) { 4416 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4417 } 4418 } 4419 } 4420 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4421 if (sub_schurs && sub_schurs->reuse_solver) { 4422 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4423 4424 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4425 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4426 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4427 reuse_solver->is_R = pcbddc->is_R_local; 4428 } else { 4429 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4430 } 4431 4432 /* print some info if requested */ 4433 if (pcbddc->dbg_flag) { 4434 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4435 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4436 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4437 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4438 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4440 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4441 } 4442 4443 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4444 if (!sub_schurs || !sub_schurs->reuse_solver) { 4445 IS is_aux1,is_aux2; 4446 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4447 4448 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4449 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4450 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4451 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4452 for (i=0; i<n_D; i++) { 4453 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4454 } 4455 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4456 for (i=0, j=0; i<n_R; i++) { 4457 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4458 aux_array1[j++] = i; 4459 } 4460 } 4461 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4462 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4463 for (i=0, j=0; i<n_B; i++) { 4464 if (!PetscBTLookup(bitmask,is_indices[i])) { 4465 aux_array2[j++] = i; 4466 } 4467 } 4468 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4469 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4470 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4471 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4472 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4473 4474 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4475 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4476 for (i=0, j=0; i<n_R; i++) { 4477 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4478 aux_array1[j++] = i; 4479 } 4480 } 4481 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4482 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4483 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4484 } 4485 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4486 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4487 } else { 4488 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4489 IS tis; 4490 PetscInt schur_size; 4491 4492 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4493 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4494 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4495 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4496 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4497 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4498 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4499 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4500 } 4501 } 4502 PetscFunctionReturn(0); 4503 } 4504 4505 4506 #undef __FUNCT__ 4507 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 4508 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4509 { 4510 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4511 PC_IS *pcis = (PC_IS*)pc->data; 4512 PC pc_temp; 4513 Mat A_RR; 4514 MatReuse reuse; 4515 PetscScalar m_one = -1.0; 4516 PetscReal value; 4517 PetscInt n_D,n_R; 4518 PetscBool check_corr[2],issbaij; 4519 PetscErrorCode ierr; 4520 /* prefixes stuff */ 4521 char dir_prefix[256],neu_prefix[256],str_level[16]; 4522 size_t len; 4523 4524 PetscFunctionBegin; 4525 4526 /* compute prefixes */ 4527 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4528 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4529 if (!pcbddc->current_level) { 4530 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4531 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4532 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4533 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4534 } else { 4535 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4536 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4537 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4538 len -= 15; /* remove "pc_bddc_coarse_" */ 4539 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4540 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4541 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4542 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4543 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4544 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4545 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4546 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4547 } 4548 4549 /* DIRICHLET PROBLEM */ 4550 if (dirichlet) { 4551 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4552 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4553 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4554 if (pcbddc->dbg_flag) { 4555 Mat A_IIn; 4556 4557 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4558 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4559 pcis->A_II = A_IIn; 4560 } 4561 } 4562 if (pcbddc->local_mat->symmetric_set) { 4563 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4564 } 4565 /* Matrix for Dirichlet problem is pcis->A_II */ 4566 n_D = pcis->n - pcis->n_B; 4567 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4568 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4569 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4570 /* default */ 4571 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4572 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4573 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4574 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4575 if (issbaij) { 4576 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4577 } else { 4578 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4579 } 4580 /* Allow user's customization */ 4581 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4582 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4583 } 4584 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4585 if (sub_schurs && sub_schurs->reuse_solver) { 4586 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4587 4588 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4589 } 4590 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4591 if (!n_D) { 4592 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4593 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4594 } 4595 /* Set Up KSP for Dirichlet problem of BDDC */ 4596 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4597 /* set ksp_D into pcis data */ 4598 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4599 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4600 pcis->ksp_D = pcbddc->ksp_D; 4601 } 4602 4603 /* NEUMANN PROBLEM */ 4604 A_RR = 0; 4605 if (neumann) { 4606 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4607 PetscInt ibs,mbs; 4608 PetscBool issbaij; 4609 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4610 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4611 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4612 if (pcbddc->ksp_R) { /* already created ksp */ 4613 PetscInt nn_R; 4614 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4615 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4616 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4617 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4618 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4619 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4620 reuse = MAT_INITIAL_MATRIX; 4621 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4622 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4623 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4624 reuse = MAT_INITIAL_MATRIX; 4625 } else { /* safe to reuse the matrix */ 4626 reuse = MAT_REUSE_MATRIX; 4627 } 4628 } 4629 /* last check */ 4630 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4631 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4632 reuse = MAT_INITIAL_MATRIX; 4633 } 4634 } else { /* first time, so we need to create the matrix */ 4635 reuse = MAT_INITIAL_MATRIX; 4636 } 4637 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4638 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4639 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4640 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4641 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4642 if (matis->A == pcbddc->local_mat) { 4643 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4644 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4645 } else { 4646 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4647 } 4648 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4649 if (matis->A == pcbddc->local_mat) { 4650 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4651 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4652 } else { 4653 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4654 } 4655 } 4656 /* extract A_RR */ 4657 if (sub_schurs && sub_schurs->reuse_solver) { 4658 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4659 4660 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4661 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4662 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4663 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4664 } else { 4665 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4666 } 4667 } else { 4668 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4669 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4670 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4671 } 4672 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4673 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4674 } 4675 if (pcbddc->local_mat->symmetric_set) { 4676 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4677 } 4678 if (!pcbddc->ksp_R) { /* create object if not present */ 4679 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4680 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4681 /* default */ 4682 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4683 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4684 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4685 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4686 if (issbaij) { 4687 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4688 } else { 4689 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4690 } 4691 /* Allow user's customization */ 4692 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4693 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4694 } 4695 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4696 if (!n_R) { 4697 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4698 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4699 } 4700 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4701 /* Reuse solver if it is present */ 4702 if (sub_schurs && sub_schurs->reuse_solver) { 4703 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4704 4705 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4706 } 4707 /* Set Up KSP for Neumann problem of BDDC */ 4708 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4709 } 4710 4711 if (pcbddc->dbg_flag) { 4712 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4713 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4714 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4715 } 4716 4717 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4718 check_corr[0] = check_corr[1] = PETSC_FALSE; 4719 if (pcbddc->NullSpace_corr[0]) { 4720 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4721 } 4722 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4723 check_corr[0] = PETSC_TRUE; 4724 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4725 } 4726 if (neumann && pcbddc->NullSpace_corr[2]) { 4727 check_corr[1] = PETSC_TRUE; 4728 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4729 } 4730 4731 /* check Dirichlet and Neumann solvers */ 4732 if (pcbddc->dbg_flag) { 4733 if (dirichlet) { /* Dirichlet */ 4734 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4735 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4736 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4737 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4738 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4739 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4740 if (check_corr[0]) { 4741 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4742 } 4743 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4744 } 4745 if (neumann) { /* Neumann */ 4746 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4747 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4748 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4749 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4750 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4751 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4752 if (check_corr[1]) { 4753 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4754 } 4755 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4756 } 4757 } 4758 /* free Neumann problem's matrix */ 4759 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4760 PetscFunctionReturn(0); 4761 } 4762 4763 #undef __FUNCT__ 4764 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4765 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4766 { 4767 PetscErrorCode ierr; 4768 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4769 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4770 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4771 4772 PetscFunctionBegin; 4773 if (!reuse_solver) { 4774 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4775 } 4776 if (!pcbddc->switch_static) { 4777 if (applytranspose && pcbddc->local_auxmat1) { 4778 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4779 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4780 } 4781 if (!reuse_solver) { 4782 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4783 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4784 } else { 4785 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4786 4787 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4788 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4789 } 4790 } else { 4791 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4792 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4793 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4794 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4795 if (applytranspose && pcbddc->local_auxmat1) { 4796 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4797 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4798 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4799 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4800 } 4801 } 4802 if (!reuse_solver || pcbddc->switch_static) { 4803 if (applytranspose) { 4804 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4805 } else { 4806 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4807 } 4808 } else { 4809 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4810 4811 if (applytranspose) { 4812 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4813 } else { 4814 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4815 } 4816 } 4817 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4818 if (!pcbddc->switch_static) { 4819 if (!reuse_solver) { 4820 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4821 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4822 } else { 4823 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4824 4825 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4826 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4827 } 4828 if (!applytranspose && pcbddc->local_auxmat1) { 4829 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4830 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4831 } 4832 } else { 4833 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4834 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4835 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4836 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4837 if (!applytranspose && pcbddc->local_auxmat1) { 4838 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4839 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4840 } 4841 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4842 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4843 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4844 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4845 } 4846 PetscFunctionReturn(0); 4847 } 4848 4849 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4850 #undef __FUNCT__ 4851 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4852 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4853 { 4854 PetscErrorCode ierr; 4855 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4856 PC_IS* pcis = (PC_IS*) (pc->data); 4857 const PetscScalar zero = 0.0; 4858 4859 PetscFunctionBegin; 4860 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4861 if (!pcbddc->benign_apply_coarse_only) { 4862 if (applytranspose) { 4863 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4864 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4865 } else { 4866 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4867 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4868 } 4869 } else { 4870 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4871 } 4872 4873 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4874 if (pcbddc->benign_n) { 4875 PetscScalar *array; 4876 PetscInt j; 4877 4878 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4879 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4880 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4881 } 4882 4883 /* start communications from local primal nodes to rhs of coarse solver */ 4884 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4885 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4886 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4887 4888 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4889 if (pcbddc->coarse_ksp) { 4890 Mat coarse_mat; 4891 Vec rhs,sol; 4892 MatNullSpace nullsp; 4893 PetscBool isbddc = PETSC_FALSE; 4894 4895 if (pcbddc->benign_have_null) { 4896 PC coarse_pc; 4897 4898 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4899 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4900 /* we need to propagate to coarser levels the need for a possible benign correction */ 4901 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4902 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4903 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4904 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4905 } 4906 } 4907 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4908 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4909 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4910 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4911 if (nullsp) { 4912 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4913 } 4914 if (applytranspose) { 4915 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4916 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4917 } else { 4918 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4919 PC coarse_pc; 4920 4921 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4922 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4923 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4924 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4925 } else { 4926 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4927 } 4928 } 4929 /* we don't need the benign correction at coarser levels anymore */ 4930 if (pcbddc->benign_have_null && isbddc) { 4931 PC coarse_pc; 4932 PC_BDDC* coarsepcbddc; 4933 4934 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4935 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4936 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4937 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4938 } 4939 if (nullsp) { 4940 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4941 } 4942 } 4943 4944 /* Local solution on R nodes */ 4945 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4946 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4947 } 4948 /* communications from coarse sol to local primal nodes */ 4949 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4950 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4951 4952 /* Sum contributions from the two levels */ 4953 if (!pcbddc->benign_apply_coarse_only) { 4954 if (applytranspose) { 4955 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4956 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4957 } else { 4958 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4959 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4960 } 4961 /* store p0 */ 4962 if (pcbddc->benign_n) { 4963 PetscScalar *array; 4964 PetscInt j; 4965 4966 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4967 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4968 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4969 } 4970 } else { /* expand the coarse solution */ 4971 if (applytranspose) { 4972 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4973 } else { 4974 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4975 } 4976 } 4977 PetscFunctionReturn(0); 4978 } 4979 4980 #undef __FUNCT__ 4981 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4982 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4983 { 4984 PetscErrorCode ierr; 4985 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4986 PetscScalar *array; 4987 Vec from,to; 4988 4989 PetscFunctionBegin; 4990 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4991 from = pcbddc->coarse_vec; 4992 to = pcbddc->vec1_P; 4993 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4994 Vec tvec; 4995 4996 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4997 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4998 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4999 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5000 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5001 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5002 } 5003 } else { /* from local to global -> put data in coarse right hand side */ 5004 from = pcbddc->vec1_P; 5005 to = pcbddc->coarse_vec; 5006 } 5007 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5008 PetscFunctionReturn(0); 5009 } 5010 5011 #undef __FUNCT__ 5012 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 5013 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5014 { 5015 PetscErrorCode ierr; 5016 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5017 PetscScalar *array; 5018 Vec from,to; 5019 5020 PetscFunctionBegin; 5021 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5022 from = pcbddc->coarse_vec; 5023 to = pcbddc->vec1_P; 5024 } else { /* from local to global -> put data in coarse right hand side */ 5025 from = pcbddc->vec1_P; 5026 to = pcbddc->coarse_vec; 5027 } 5028 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5029 if (smode == SCATTER_FORWARD) { 5030 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5031 Vec tvec; 5032 5033 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5034 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5035 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5036 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5037 } 5038 } else { 5039 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5040 ierr = VecResetArray(from);CHKERRQ(ierr); 5041 } 5042 } 5043 PetscFunctionReturn(0); 5044 } 5045 5046 /* uncomment for testing purposes */ 5047 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5048 #undef __FUNCT__ 5049 #define __FUNCT__ "PCBDDCConstraintsSetUp" 5050 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5051 { 5052 PetscErrorCode ierr; 5053 PC_IS* pcis = (PC_IS*)(pc->data); 5054 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5055 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5056 /* one and zero */ 5057 PetscScalar one=1.0,zero=0.0; 5058 /* space to store constraints and their local indices */ 5059 PetscScalar *constraints_data; 5060 PetscInt *constraints_idxs,*constraints_idxs_B; 5061 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5062 PetscInt *constraints_n; 5063 /* iterators */ 5064 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5065 /* BLAS integers */ 5066 PetscBLASInt lwork,lierr; 5067 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5068 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5069 /* reuse */ 5070 PetscInt olocal_primal_size,olocal_primal_size_cc; 5071 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5072 /* change of basis */ 5073 PetscBool qr_needed; 5074 PetscBT change_basis,qr_needed_idx; 5075 /* auxiliary stuff */ 5076 PetscInt *nnz,*is_indices; 5077 PetscInt ncc; 5078 /* some quantities */ 5079 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5080 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5081 5082 PetscFunctionBegin; 5083 /* Destroy Mat objects computed previously */ 5084 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5085 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5086 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5087 /* save info on constraints from previous setup (if any) */ 5088 olocal_primal_size = pcbddc->local_primal_size; 5089 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5090 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5091 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5092 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5093 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5094 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5095 5096 if (!pcbddc->adaptive_selection) { 5097 IS ISForVertices,*ISForFaces,*ISForEdges; 5098 MatNullSpace nearnullsp; 5099 const Vec *nearnullvecs; 5100 Vec *localnearnullsp; 5101 PetscScalar *array; 5102 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5103 PetscBool nnsp_has_cnst; 5104 /* LAPACK working arrays for SVD or POD */ 5105 PetscBool skip_lapack,boolforchange; 5106 PetscScalar *work; 5107 PetscReal *singular_vals; 5108 #if defined(PETSC_USE_COMPLEX) 5109 PetscReal *rwork; 5110 #endif 5111 #if defined(PETSC_MISSING_LAPACK_GESVD) 5112 PetscScalar *temp_basis,*correlation_mat; 5113 #else 5114 PetscBLASInt dummy_int=1; 5115 PetscScalar dummy_scalar=1.; 5116 #endif 5117 5118 /* Get index sets for faces, edges and vertices from graph */ 5119 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5120 /* print some info */ 5121 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5122 PetscInt nv; 5123 5124 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5125 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5126 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5127 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5128 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5129 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5130 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5131 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5132 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5133 } 5134 5135 /* free unneeded index sets */ 5136 if (!pcbddc->use_vertices) { 5137 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5138 } 5139 if (!pcbddc->use_edges) { 5140 for (i=0;i<n_ISForEdges;i++) { 5141 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5142 } 5143 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5144 n_ISForEdges = 0; 5145 } 5146 if (!pcbddc->use_faces) { 5147 for (i=0;i<n_ISForFaces;i++) { 5148 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5149 } 5150 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5151 n_ISForFaces = 0; 5152 } 5153 5154 /* check if near null space is attached to global mat */ 5155 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5156 if (nearnullsp) { 5157 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5158 /* remove any stored info */ 5159 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5160 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5161 /* store information for BDDC solver reuse */ 5162 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5163 pcbddc->onearnullspace = nearnullsp; 5164 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5165 for (i=0;i<nnsp_size;i++) { 5166 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5167 } 5168 } else { /* if near null space is not provided BDDC uses constants by default */ 5169 nnsp_size = 0; 5170 nnsp_has_cnst = PETSC_TRUE; 5171 } 5172 /* get max number of constraints on a single cc */ 5173 max_constraints = nnsp_size; 5174 if (nnsp_has_cnst) max_constraints++; 5175 5176 /* 5177 Evaluate maximum storage size needed by the procedure 5178 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5179 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5180 There can be multiple constraints per connected component 5181 */ 5182 n_vertices = 0; 5183 if (ISForVertices) { 5184 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5185 } 5186 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5187 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5188 5189 total_counts = n_ISForFaces+n_ISForEdges; 5190 total_counts *= max_constraints; 5191 total_counts += n_vertices; 5192 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5193 5194 total_counts = 0; 5195 max_size_of_constraint = 0; 5196 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5197 IS used_is; 5198 if (i<n_ISForEdges) { 5199 used_is = ISForEdges[i]; 5200 } else { 5201 used_is = ISForFaces[i-n_ISForEdges]; 5202 } 5203 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5204 total_counts += j; 5205 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5206 } 5207 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5208 5209 /* get local part of global near null space vectors */ 5210 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5211 for (k=0;k<nnsp_size;k++) { 5212 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5213 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5214 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5215 } 5216 5217 /* whether or not to skip lapack calls */ 5218 skip_lapack = PETSC_TRUE; 5219 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5220 5221 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5222 if (!skip_lapack) { 5223 PetscScalar temp_work; 5224 5225 #if defined(PETSC_MISSING_LAPACK_GESVD) 5226 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5227 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5228 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5229 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5230 #if defined(PETSC_USE_COMPLEX) 5231 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5232 #endif 5233 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5234 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5235 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5236 lwork = -1; 5237 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5238 #if !defined(PETSC_USE_COMPLEX) 5239 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5240 #else 5241 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5242 #endif 5243 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5244 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5245 #else /* on missing GESVD */ 5246 /* SVD */ 5247 PetscInt max_n,min_n; 5248 max_n = max_size_of_constraint; 5249 min_n = max_constraints; 5250 if (max_size_of_constraint < max_constraints) { 5251 min_n = max_size_of_constraint; 5252 max_n = max_constraints; 5253 } 5254 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5255 #if defined(PETSC_USE_COMPLEX) 5256 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5257 #endif 5258 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5259 lwork = -1; 5260 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5261 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5262 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5263 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5264 #if !defined(PETSC_USE_COMPLEX) 5265 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5266 #else 5267 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5268 #endif 5269 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5270 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5271 #endif /* on missing GESVD */ 5272 /* Allocate optimal workspace */ 5273 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5274 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5275 } 5276 /* Now we can loop on constraining sets */ 5277 total_counts = 0; 5278 constraints_idxs_ptr[0] = 0; 5279 constraints_data_ptr[0] = 0; 5280 /* vertices */ 5281 if (n_vertices) { 5282 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5283 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5284 for (i=0;i<n_vertices;i++) { 5285 constraints_n[total_counts] = 1; 5286 constraints_data[total_counts] = 1.0; 5287 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5288 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5289 total_counts++; 5290 } 5291 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5292 n_vertices = total_counts; 5293 } 5294 5295 /* edges and faces */ 5296 total_counts_cc = total_counts; 5297 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5298 IS used_is; 5299 PetscBool idxs_copied = PETSC_FALSE; 5300 5301 if (ncc<n_ISForEdges) { 5302 used_is = ISForEdges[ncc]; 5303 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5304 } else { 5305 used_is = ISForFaces[ncc-n_ISForEdges]; 5306 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5307 } 5308 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5309 5310 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5311 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5312 /* change of basis should not be performed on local periodic nodes */ 5313 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5314 if (nnsp_has_cnst) { 5315 PetscScalar quad_value; 5316 5317 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5318 idxs_copied = PETSC_TRUE; 5319 5320 if (!pcbddc->use_nnsp_true) { 5321 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5322 } else { 5323 quad_value = 1.0; 5324 } 5325 for (j=0;j<size_of_constraint;j++) { 5326 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5327 } 5328 temp_constraints++; 5329 total_counts++; 5330 } 5331 for (k=0;k<nnsp_size;k++) { 5332 PetscReal real_value; 5333 PetscScalar *ptr_to_data; 5334 5335 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5336 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5337 for (j=0;j<size_of_constraint;j++) { 5338 ptr_to_data[j] = array[is_indices[j]]; 5339 } 5340 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5341 /* check if array is null on the connected component */ 5342 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5343 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5344 if (real_value > 0.0) { /* keep indices and values */ 5345 temp_constraints++; 5346 total_counts++; 5347 if (!idxs_copied) { 5348 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5349 idxs_copied = PETSC_TRUE; 5350 } 5351 } 5352 } 5353 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5354 valid_constraints = temp_constraints; 5355 if (!pcbddc->use_nnsp_true && temp_constraints) { 5356 if (temp_constraints == 1) { /* just normalize the constraint */ 5357 PetscScalar norm,*ptr_to_data; 5358 5359 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5360 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5361 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5362 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5363 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5364 } else { /* perform SVD */ 5365 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5366 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5367 5368 #if defined(PETSC_MISSING_LAPACK_GESVD) 5369 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5370 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5371 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5372 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5373 from that computed using LAPACKgesvd 5374 -> This is due to a different computation of eigenvectors in LAPACKheev 5375 -> The quality of the POD-computed basis will be the same */ 5376 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5377 /* Store upper triangular part of correlation matrix */ 5378 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5379 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5380 for (j=0;j<temp_constraints;j++) { 5381 for (k=0;k<j+1;k++) { 5382 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5383 } 5384 } 5385 /* compute eigenvalues and eigenvectors of correlation matrix */ 5386 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5387 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5388 #if !defined(PETSC_USE_COMPLEX) 5389 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5390 #else 5391 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5392 #endif 5393 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5394 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5395 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5396 j = 0; 5397 while (j < temp_constraints && singular_vals[j] < tol) j++; 5398 total_counts = total_counts-j; 5399 valid_constraints = temp_constraints-j; 5400 /* scale and copy POD basis into used quadrature memory */ 5401 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5402 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5403 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5404 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5405 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5406 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5407 if (j<temp_constraints) { 5408 PetscInt ii; 5409 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5410 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5411 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5412 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5413 for (k=0;k<temp_constraints-j;k++) { 5414 for (ii=0;ii<size_of_constraint;ii++) { 5415 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5416 } 5417 } 5418 } 5419 #else /* on missing GESVD */ 5420 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5421 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5422 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5423 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5424 #if !defined(PETSC_USE_COMPLEX) 5425 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5426 #else 5427 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5428 #endif 5429 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5430 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5431 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5432 k = temp_constraints; 5433 if (k > size_of_constraint) k = size_of_constraint; 5434 j = 0; 5435 while (j < k && singular_vals[k-j-1] < tol) j++; 5436 valid_constraints = k-j; 5437 total_counts = total_counts-temp_constraints+valid_constraints; 5438 #endif /* on missing GESVD */ 5439 } 5440 } 5441 /* update pointers information */ 5442 if (valid_constraints) { 5443 constraints_n[total_counts_cc] = valid_constraints; 5444 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5445 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5446 /* set change_of_basis flag */ 5447 if (boolforchange) { 5448 PetscBTSet(change_basis,total_counts_cc); 5449 } 5450 total_counts_cc++; 5451 } 5452 } 5453 /* free workspace */ 5454 if (!skip_lapack) { 5455 ierr = PetscFree(work);CHKERRQ(ierr); 5456 #if defined(PETSC_USE_COMPLEX) 5457 ierr = PetscFree(rwork);CHKERRQ(ierr); 5458 #endif 5459 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5460 #if defined(PETSC_MISSING_LAPACK_GESVD) 5461 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5462 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5463 #endif 5464 } 5465 for (k=0;k<nnsp_size;k++) { 5466 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5467 } 5468 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5469 /* free index sets of faces, edges and vertices */ 5470 for (i=0;i<n_ISForFaces;i++) { 5471 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5472 } 5473 if (n_ISForFaces) { 5474 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5475 } 5476 for (i=0;i<n_ISForEdges;i++) { 5477 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5478 } 5479 if (n_ISForEdges) { 5480 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5481 } 5482 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5483 } else { 5484 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5485 5486 total_counts = 0; 5487 n_vertices = 0; 5488 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5489 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5490 } 5491 max_constraints = 0; 5492 total_counts_cc = 0; 5493 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5494 total_counts += pcbddc->adaptive_constraints_n[i]; 5495 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5496 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5497 } 5498 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5499 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5500 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5501 constraints_data = pcbddc->adaptive_constraints_data; 5502 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5503 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5504 total_counts_cc = 0; 5505 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5506 if (pcbddc->adaptive_constraints_n[i]) { 5507 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5508 } 5509 } 5510 #if 0 5511 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5512 for (i=0;i<total_counts_cc;i++) { 5513 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5514 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5515 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5516 printf(" %d",constraints_idxs[j]); 5517 } 5518 printf("\n"); 5519 printf("number of cc: %d\n",constraints_n[i]); 5520 } 5521 for (i=0;i<n_vertices;i++) { 5522 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5523 } 5524 for (i=0;i<sub_schurs->n_subs;i++) { 5525 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5526 } 5527 #endif 5528 5529 max_size_of_constraint = 0; 5530 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5531 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5532 /* Change of basis */ 5533 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5534 if (pcbddc->use_change_of_basis) { 5535 for (i=0;i<sub_schurs->n_subs;i++) { 5536 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5537 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5538 } 5539 } 5540 } 5541 } 5542 pcbddc->local_primal_size = total_counts; 5543 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5544 5545 /* map constraints_idxs in boundary numbering */ 5546 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5547 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5548 5549 /* Create constraint matrix */ 5550 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5551 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5552 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5553 5554 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5555 /* determine if a QR strategy is needed for change of basis */ 5556 qr_needed = PETSC_FALSE; 5557 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5558 total_primal_vertices=0; 5559 pcbddc->local_primal_size_cc = 0; 5560 for (i=0;i<total_counts_cc;i++) { 5561 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5562 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5563 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5564 pcbddc->local_primal_size_cc += 1; 5565 } else if (PetscBTLookup(change_basis,i)) { 5566 for (k=0;k<constraints_n[i];k++) { 5567 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5568 } 5569 pcbddc->local_primal_size_cc += constraints_n[i]; 5570 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5571 PetscBTSet(qr_needed_idx,i); 5572 qr_needed = PETSC_TRUE; 5573 } 5574 } else { 5575 pcbddc->local_primal_size_cc += 1; 5576 } 5577 } 5578 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5579 pcbddc->n_vertices = total_primal_vertices; 5580 /* permute indices in order to have a sorted set of vertices */ 5581 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5582 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5583 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5584 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5585 5586 /* nonzero structure of constraint matrix */ 5587 /* and get reference dof for local constraints */ 5588 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5589 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5590 5591 j = total_primal_vertices; 5592 total_counts = total_primal_vertices; 5593 cum = total_primal_vertices; 5594 for (i=n_vertices;i<total_counts_cc;i++) { 5595 if (!PetscBTLookup(change_basis,i)) { 5596 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5597 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5598 cum++; 5599 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5600 for (k=0;k<constraints_n[i];k++) { 5601 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5602 nnz[j+k] = size_of_constraint; 5603 } 5604 j += constraints_n[i]; 5605 } 5606 } 5607 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5608 ierr = PetscFree(nnz);CHKERRQ(ierr); 5609 5610 /* set values in constraint matrix */ 5611 for (i=0;i<total_primal_vertices;i++) { 5612 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5613 } 5614 total_counts = total_primal_vertices; 5615 for (i=n_vertices;i<total_counts_cc;i++) { 5616 if (!PetscBTLookup(change_basis,i)) { 5617 PetscInt *cols; 5618 5619 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5620 cols = constraints_idxs+constraints_idxs_ptr[i]; 5621 for (k=0;k<constraints_n[i];k++) { 5622 PetscInt row = total_counts+k; 5623 PetscScalar *vals; 5624 5625 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5626 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5627 } 5628 total_counts += constraints_n[i]; 5629 } 5630 } 5631 /* assembling */ 5632 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5633 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5634 5635 /* 5636 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5637 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5638 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5639 */ 5640 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5641 if (pcbddc->use_change_of_basis) { 5642 /* dual and primal dofs on a single cc */ 5643 PetscInt dual_dofs,primal_dofs; 5644 /* working stuff for GEQRF */ 5645 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5646 PetscBLASInt lqr_work; 5647 /* working stuff for UNGQR */ 5648 PetscScalar *gqr_work,lgqr_work_t; 5649 PetscBLASInt lgqr_work; 5650 /* working stuff for TRTRS */ 5651 PetscScalar *trs_rhs; 5652 PetscBLASInt Blas_NRHS; 5653 /* pointers for values insertion into change of basis matrix */ 5654 PetscInt *start_rows,*start_cols; 5655 PetscScalar *start_vals; 5656 /* working stuff for values insertion */ 5657 PetscBT is_primal; 5658 PetscInt *aux_primal_numbering_B; 5659 /* matrix sizes */ 5660 PetscInt global_size,local_size; 5661 /* temporary change of basis */ 5662 Mat localChangeOfBasisMatrix; 5663 /* extra space for debugging */ 5664 PetscScalar *dbg_work; 5665 5666 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5667 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5668 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5669 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5670 /* nonzeros for local mat */ 5671 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5672 if (!pcbddc->benign_change || pcbddc->fake_change) { 5673 for (i=0;i<pcis->n;i++) nnz[i]=1; 5674 } else { 5675 const PetscInt *ii; 5676 PetscInt n; 5677 PetscBool flg_row; 5678 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5679 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5680 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5681 } 5682 for (i=n_vertices;i<total_counts_cc;i++) { 5683 if (PetscBTLookup(change_basis,i)) { 5684 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5685 if (PetscBTLookup(qr_needed_idx,i)) { 5686 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5687 } else { 5688 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5689 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5690 } 5691 } 5692 } 5693 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5694 ierr = PetscFree(nnz);CHKERRQ(ierr); 5695 /* Set interior change in the matrix */ 5696 if (!pcbddc->benign_change || pcbddc->fake_change) { 5697 for (i=0;i<pcis->n;i++) { 5698 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5699 } 5700 } else { 5701 const PetscInt *ii,*jj; 5702 PetscScalar *aa; 5703 PetscInt n; 5704 PetscBool flg_row; 5705 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5706 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5707 for (i=0;i<n;i++) { 5708 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5709 } 5710 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5711 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5712 } 5713 5714 if (pcbddc->dbg_flag) { 5715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5716 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5717 } 5718 5719 5720 /* Now we loop on the constraints which need a change of basis */ 5721 /* 5722 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5723 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5724 5725 Basic blocks of change of basis matrix T computed by 5726 5727 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5728 5729 | 1 0 ... 0 s_1/S | 5730 | 0 1 ... 0 s_2/S | 5731 | ... | 5732 | 0 ... 1 s_{n-1}/S | 5733 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5734 5735 with S = \sum_{i=1}^n s_i^2 5736 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5737 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5738 5739 - QR decomposition of constraints otherwise 5740 */ 5741 if (qr_needed) { 5742 /* space to store Q */ 5743 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5744 /* array to store scaling factors for reflectors */ 5745 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5746 /* first we issue queries for optimal work */ 5747 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5748 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5749 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5750 lqr_work = -1; 5751 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5752 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5753 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5754 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5755 lgqr_work = -1; 5756 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5757 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5758 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5759 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5760 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5761 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5762 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5763 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5764 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5765 /* array to store rhs and solution of triangular solver */ 5766 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5767 /* allocating workspace for check */ 5768 if (pcbddc->dbg_flag) { 5769 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5770 } 5771 } 5772 /* array to store whether a node is primal or not */ 5773 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5774 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5775 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5776 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5777 for (i=0;i<total_primal_vertices;i++) { 5778 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5779 } 5780 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5781 5782 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5783 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5784 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5785 if (PetscBTLookup(change_basis,total_counts)) { 5786 /* get constraint info */ 5787 primal_dofs = constraints_n[total_counts]; 5788 dual_dofs = size_of_constraint-primal_dofs; 5789 5790 if (pcbddc->dbg_flag) { 5791 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5792 } 5793 5794 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5795 5796 /* copy quadrature constraints for change of basis check */ 5797 if (pcbddc->dbg_flag) { 5798 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5799 } 5800 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5801 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5802 5803 /* compute QR decomposition of constraints */ 5804 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5805 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5806 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5807 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5808 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5809 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5810 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5811 5812 /* explictly compute R^-T */ 5813 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5814 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5815 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5816 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5817 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5818 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5819 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5820 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5821 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5822 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5823 5824 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5825 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5826 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5827 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5828 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5829 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5830 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5831 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5832 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5833 5834 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5835 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5836 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5837 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5838 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5839 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5840 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5841 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5842 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5843 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5844 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5845 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5846 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5847 5848 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5849 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5850 /* insert cols for primal dofs */ 5851 for (j=0;j<primal_dofs;j++) { 5852 start_vals = &qr_basis[j*size_of_constraint]; 5853 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5854 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5855 } 5856 /* insert cols for dual dofs */ 5857 for (j=0,k=0;j<dual_dofs;k++) { 5858 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5859 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5860 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5861 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5862 j++; 5863 } 5864 } 5865 5866 /* check change of basis */ 5867 if (pcbddc->dbg_flag) { 5868 PetscInt ii,jj; 5869 PetscBool valid_qr=PETSC_TRUE; 5870 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5871 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5872 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5873 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5874 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5875 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5876 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5877 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5878 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5879 for (jj=0;jj<size_of_constraint;jj++) { 5880 for (ii=0;ii<primal_dofs;ii++) { 5881 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5882 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5883 } 5884 } 5885 if (!valid_qr) { 5886 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5887 for (jj=0;jj<size_of_constraint;jj++) { 5888 for (ii=0;ii<primal_dofs;ii++) { 5889 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5890 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5891 } 5892 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5893 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5894 } 5895 } 5896 } 5897 } else { 5898 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5899 } 5900 } 5901 } else { /* simple transformation block */ 5902 PetscInt row,col; 5903 PetscScalar val,norm; 5904 5905 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5906 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5907 for (j=0;j<size_of_constraint;j++) { 5908 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5909 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5910 if (!PetscBTLookup(is_primal,row_B)) { 5911 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5912 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5913 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5914 } else { 5915 for (k=0;k<size_of_constraint;k++) { 5916 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5917 if (row != col) { 5918 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5919 } else { 5920 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5921 } 5922 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5923 } 5924 } 5925 } 5926 if (pcbddc->dbg_flag) { 5927 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5928 } 5929 } 5930 } else { 5931 if (pcbddc->dbg_flag) { 5932 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5933 } 5934 } 5935 } 5936 5937 /* free workspace */ 5938 if (qr_needed) { 5939 if (pcbddc->dbg_flag) { 5940 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5941 } 5942 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5943 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5944 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5945 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5946 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5947 } 5948 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5949 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5950 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5951 5952 /* assembling of global change of variable */ 5953 if (!pcbddc->fake_change) { 5954 Mat tmat; 5955 PetscInt bs; 5956 5957 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5958 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5959 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5960 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5961 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5962 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5963 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5964 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5965 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5966 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5967 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5968 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5969 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5970 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5971 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5972 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5973 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5974 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5975 5976 /* check */ 5977 if (pcbddc->dbg_flag) { 5978 PetscReal error; 5979 Vec x,x_change; 5980 5981 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5982 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5983 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5984 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5985 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5986 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5987 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5988 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5989 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5990 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5991 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5992 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5993 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5994 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5995 ierr = VecDestroy(&x);CHKERRQ(ierr); 5996 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5997 } 5998 /* adapt sub_schurs computed (if any) */ 5999 if (pcbddc->use_deluxe_scaling) { 6000 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6001 6002 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6003 if (sub_schurs && sub_schurs->S_Ej_all) { 6004 Mat S_new,tmat; 6005 IS is_all_N,is_V_Sall = NULL; 6006 6007 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6008 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6009 if (pcbddc->deluxe_zerorows) { 6010 ISLocalToGlobalMapping NtoSall; 6011 IS is_V; 6012 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6013 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6014 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6015 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6016 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6017 } 6018 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6019 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6020 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6021 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6022 if (pcbddc->deluxe_zerorows) { 6023 const PetscScalar *array; 6024 const PetscInt *idxs_V,*idxs_all; 6025 PetscInt i,n_V; 6026 6027 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6028 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6029 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6030 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6031 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6032 for (i=0;i<n_V;i++) { 6033 PetscScalar val; 6034 PetscInt idx; 6035 6036 idx = idxs_V[i]; 6037 val = array[idxs_all[idxs_V[i]]]; 6038 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6039 } 6040 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6041 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6042 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6043 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6044 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6045 } 6046 sub_schurs->S_Ej_all = S_new; 6047 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6048 if (sub_schurs->sum_S_Ej_all) { 6049 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6050 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6051 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6052 if (pcbddc->deluxe_zerorows) { 6053 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6054 } 6055 sub_schurs->sum_S_Ej_all = S_new; 6056 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6057 } 6058 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6059 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6060 } 6061 /* destroy any change of basis context in sub_schurs */ 6062 if (sub_schurs && sub_schurs->change) { 6063 PetscInt i; 6064 6065 for (i=0;i<sub_schurs->n_subs;i++) { 6066 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6067 } 6068 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6069 } 6070 } 6071 if (pcbddc->switch_static) { /* need to save the local change */ 6072 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6073 } else { 6074 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6075 } 6076 /* determine if any process has changed the pressures locally */ 6077 pcbddc->change_interior = pcbddc->benign_have_null; 6078 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6079 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6080 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6081 pcbddc->use_qr_single = qr_needed; 6082 } 6083 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6084 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6085 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6086 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6087 } else { 6088 Mat benign_global = NULL; 6089 if (pcbddc->benign_have_null) { 6090 Mat tmat; 6091 6092 pcbddc->change_interior = PETSC_TRUE; 6093 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6094 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6095 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6096 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6097 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6098 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6099 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6100 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6101 if (pcbddc->benign_change) { 6102 Mat M; 6103 6104 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6105 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6106 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6107 ierr = MatDestroy(&M);CHKERRQ(ierr); 6108 } else { 6109 Mat eye; 6110 PetscScalar *array; 6111 6112 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6113 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6114 for (i=0;i<pcis->n;i++) { 6115 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6116 } 6117 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6118 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6119 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6120 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6121 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6122 } 6123 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6124 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6125 } 6126 if (pcbddc->user_ChangeOfBasisMatrix) { 6127 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6128 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6129 } else if (pcbddc->benign_have_null) { 6130 pcbddc->ChangeOfBasisMatrix = benign_global; 6131 } 6132 } 6133 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6134 IS is_global; 6135 const PetscInt *gidxs; 6136 6137 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6139 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6140 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6141 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6142 } 6143 } 6144 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6145 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6146 } 6147 6148 if (!pcbddc->fake_change) { 6149 /* add pressure dofs to set of primal nodes for numbering purposes */ 6150 for (i=0;i<pcbddc->benign_n;i++) { 6151 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6152 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6153 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6154 pcbddc->local_primal_size_cc++; 6155 pcbddc->local_primal_size++; 6156 } 6157 6158 /* check if a new primal space has been introduced (also take into account benign trick) */ 6159 pcbddc->new_primal_space_local = PETSC_TRUE; 6160 if (olocal_primal_size == pcbddc->local_primal_size) { 6161 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6162 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6163 if (!pcbddc->new_primal_space_local) { 6164 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6165 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6166 } 6167 } 6168 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6169 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6170 } 6171 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6172 6173 /* flush dbg viewer */ 6174 if (pcbddc->dbg_flag) { 6175 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6176 } 6177 6178 /* free workspace */ 6179 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6180 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6181 if (!pcbddc->adaptive_selection) { 6182 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6183 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6184 } else { 6185 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6186 pcbddc->adaptive_constraints_idxs_ptr, 6187 pcbddc->adaptive_constraints_data_ptr, 6188 pcbddc->adaptive_constraints_idxs, 6189 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6190 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6191 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6192 } 6193 PetscFunctionReturn(0); 6194 } 6195 6196 #undef __FUNCT__ 6197 #define __FUNCT__ "PCBDDCAnalyzeInterface" 6198 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6199 { 6200 ISLocalToGlobalMapping map; 6201 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6202 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6203 PetscInt ierr,i,N; 6204 6205 PetscFunctionBegin; 6206 if (pcbddc->recompute_topography) { 6207 pcbddc->graphanalyzed = PETSC_FALSE; 6208 /* Reset previously computed graph */ 6209 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6210 /* Init local Graph struct */ 6211 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6212 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6213 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6214 6215 /* Check validity of the csr graph passed in by the user */ 6216 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6217 6218 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6219 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 6220 PetscInt *xadj,*adjncy; 6221 PetscInt nvtxs; 6222 PetscBool flg_row=PETSC_FALSE; 6223 6224 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6225 if (flg_row) { 6226 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6227 pcbddc->computed_rowadj = PETSC_TRUE; 6228 } 6229 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6230 } 6231 if (pcbddc->dbg_flag) { 6232 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6233 } 6234 6235 /* Setup of Graph */ 6236 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6237 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6238 6239 /* attach info on disconnected subdomains if present */ 6240 if (pcbddc->n_local_subs) { 6241 PetscInt *local_subs; 6242 6243 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6244 for (i=0;i<pcbddc->n_local_subs;i++) { 6245 const PetscInt *idxs; 6246 PetscInt nl,j; 6247 6248 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6249 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6250 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6251 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6252 } 6253 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6254 pcbddc->mat_graph->local_subs = local_subs; 6255 } 6256 } 6257 6258 if (!pcbddc->graphanalyzed) { 6259 /* Graph's connected components analysis */ 6260 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6261 pcbddc->graphanalyzed = PETSC_TRUE; 6262 } 6263 PetscFunctionReturn(0); 6264 } 6265 6266 #undef __FUNCT__ 6267 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 6268 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6269 { 6270 PetscInt i,j; 6271 PetscScalar *alphas; 6272 PetscErrorCode ierr; 6273 6274 PetscFunctionBegin; 6275 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6276 for (i=0;i<n;i++) { 6277 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6278 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6279 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6280 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6281 } 6282 ierr = PetscFree(alphas);CHKERRQ(ierr); 6283 PetscFunctionReturn(0); 6284 } 6285 6286 #undef __FUNCT__ 6287 #define __FUNCT__ "MatISGetSubassemblingPattern" 6288 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6289 { 6290 Mat A; 6291 PetscInt n_neighs,*neighs,*n_shared,**shared; 6292 PetscMPIInt size,rank,color; 6293 PetscInt *xadj,*adjncy; 6294 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6295 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 6296 PetscInt void_procs,*procs_candidates = NULL; 6297 PetscInt xadj_count, *count; 6298 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6299 PetscSubcomm psubcomm; 6300 MPI_Comm subcomm; 6301 PetscErrorCode ierr; 6302 6303 PetscFunctionBegin; 6304 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6305 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6306 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6307 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6308 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6309 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6310 6311 if (have_void) *have_void = PETSC_FALSE; 6312 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6313 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6314 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6315 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6316 im_active = !!(n); 6317 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6318 void_procs = size - active_procs; 6319 /* get ranks of of non-active processes in mat communicator */ 6320 if (void_procs) { 6321 PetscInt ncand; 6322 6323 if (have_void) *have_void = PETSC_TRUE; 6324 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6325 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6326 for (i=0,ncand=0;i<size;i++) { 6327 if (!procs_candidates[i]) { 6328 procs_candidates[ncand++] = i; 6329 } 6330 } 6331 /* force n_subdomains to be not greater that the number of non-active processes */ 6332 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6333 } 6334 6335 /* number of subdomains requested greater than active processes -> just shift the matrix 6336 number of subdomains requested 1 -> send to master or first candidate in voids */ 6337 if (active_procs < *n_subdomains || *n_subdomains == 1) { 6338 PetscInt issize,isidx,dest; 6339 if (*n_subdomains == 1) dest = 0; 6340 else dest = rank; 6341 if (im_active) { 6342 issize = 1; 6343 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6344 isidx = procs_candidates[dest]; 6345 } else { 6346 isidx = dest; 6347 } 6348 } else { 6349 issize = 0; 6350 isidx = -1; 6351 } 6352 *n_subdomains = active_procs; 6353 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6354 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6355 PetscFunctionReturn(0); 6356 } 6357 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6358 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6359 threshold = PetscMax(threshold,2); 6360 6361 /* Get info on mapping */ 6362 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 6363 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6364 6365 /* build local CSR graph of subdomains' connectivity */ 6366 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6367 xadj[0] = 0; 6368 xadj[1] = PetscMax(n_neighs-1,0); 6369 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6370 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6371 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 6372 for (i=1;i<n_neighs;i++) 6373 for (j=0;j<n_shared[i];j++) 6374 count[shared[i][j]] += 1; 6375 6376 xadj_count = 0; 6377 for (i=1;i<n_neighs;i++) { 6378 for (j=0;j<n_shared[i];j++) { 6379 if (count[shared[i][j]] < threshold) { 6380 adjncy[xadj_count] = neighs[i]; 6381 adjncy_wgt[xadj_count] = n_shared[i]; 6382 xadj_count++; 6383 break; 6384 } 6385 } 6386 } 6387 xadj[1] = xadj_count; 6388 ierr = PetscFree(count);CHKERRQ(ierr); 6389 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6390 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6391 6392 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6393 6394 /* Restrict work on active processes only */ 6395 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6396 if (void_procs) { 6397 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6398 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6399 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6400 subcomm = PetscSubcommChild(psubcomm); 6401 } else { 6402 psubcomm = NULL; 6403 subcomm = PetscObjectComm((PetscObject)mat); 6404 } 6405 6406 v_wgt = NULL; 6407 if (!color) { 6408 ierr = PetscFree(xadj);CHKERRQ(ierr); 6409 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6410 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6411 } else { 6412 Mat subdomain_adj; 6413 IS new_ranks,new_ranks_contig; 6414 MatPartitioning partitioner; 6415 PetscInt rstart=0,rend=0; 6416 PetscInt *is_indices,*oldranks; 6417 PetscMPIInt size; 6418 PetscBool aggregate; 6419 6420 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6421 if (void_procs) { 6422 PetscInt prank = rank; 6423 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6424 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6425 for (i=0;i<xadj[1];i++) { 6426 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6427 } 6428 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6429 } else { 6430 oldranks = NULL; 6431 } 6432 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6433 if (aggregate) { /* TODO: all this part could be made more efficient */ 6434 PetscInt lrows,row,ncols,*cols; 6435 PetscMPIInt nrank; 6436 PetscScalar *vals; 6437 6438 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6439 lrows = 0; 6440 if (nrank<redprocs) { 6441 lrows = size/redprocs; 6442 if (nrank<size%redprocs) lrows++; 6443 } 6444 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6445 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6446 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6447 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6448 row = nrank; 6449 ncols = xadj[1]-xadj[0]; 6450 cols = adjncy; 6451 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6452 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6453 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6454 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6455 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6456 ierr = PetscFree(xadj);CHKERRQ(ierr); 6457 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6458 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6459 ierr = PetscFree(vals);CHKERRQ(ierr); 6460 if (use_vwgt) { 6461 Vec v; 6462 const PetscScalar *array; 6463 PetscInt nl; 6464 6465 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6466 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 6467 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6468 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6469 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6470 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6471 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6472 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6473 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6474 ierr = VecDestroy(&v);CHKERRQ(ierr); 6475 } 6476 } else { 6477 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6478 if (use_vwgt) { 6479 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6480 v_wgt[0] = local_size; 6481 } 6482 } 6483 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6484 6485 /* Partition */ 6486 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6487 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6488 if (v_wgt) { 6489 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6490 } 6491 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6492 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6493 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6494 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6495 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6496 6497 /* renumber new_ranks to avoid "holes" in new set of processors */ 6498 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6499 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6500 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6501 if (!aggregate) { 6502 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6503 #if defined(PETSC_USE_DEBUG) 6504 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6505 #endif 6506 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6507 } else if (oldranks) { 6508 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6509 } else { 6510 ranks_send_to_idx[0] = is_indices[0]; 6511 } 6512 } else { 6513 PetscInt idxs[1]; 6514 PetscMPIInt tag; 6515 MPI_Request *reqs; 6516 6517 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6518 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6519 for (i=rstart;i<rend;i++) { 6520 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6521 } 6522 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6523 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6524 ierr = PetscFree(reqs);CHKERRQ(ierr); 6525 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6526 #if defined(PETSC_USE_DEBUG) 6527 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6528 #endif 6529 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6530 } else if (oldranks) { 6531 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6532 } else { 6533 ranks_send_to_idx[0] = idxs[0]; 6534 } 6535 } 6536 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6537 /* clean up */ 6538 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6539 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6540 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6541 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6542 } 6543 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6544 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6545 6546 /* assemble parallel IS for sends */ 6547 i = 1; 6548 if (!color) i=0; 6549 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6550 PetscFunctionReturn(0); 6551 } 6552 6553 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6554 6555 #undef __FUNCT__ 6556 #define __FUNCT__ "PCBDDCMatISSubassemble" 6557 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6558 { 6559 Mat local_mat; 6560 IS is_sends_internal; 6561 PetscInt rows,cols,new_local_rows; 6562 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6563 PetscBool ismatis,isdense,newisdense,destroy_mat; 6564 ISLocalToGlobalMapping l2gmap; 6565 PetscInt* l2gmap_indices; 6566 const PetscInt* is_indices; 6567 MatType new_local_type; 6568 /* buffers */ 6569 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6570 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6571 PetscInt *recv_buffer_idxs_local; 6572 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6573 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6574 /* MPI */ 6575 MPI_Comm comm,comm_n; 6576 PetscSubcomm subcomm; 6577 PetscMPIInt n_sends,n_recvs,commsize; 6578 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6579 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6580 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6581 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6582 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6583 PetscErrorCode ierr; 6584 6585 PetscFunctionBegin; 6586 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6587 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6588 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 6589 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6590 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6591 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6592 PetscValidLogicalCollectiveBool(mat,reuse,6); 6593 PetscValidLogicalCollectiveInt(mat,nis,8); 6594 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6595 if (nvecs) { 6596 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6597 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6598 } 6599 /* further checks */ 6600 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6601 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6602 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6603 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6604 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6605 if (reuse && *mat_n) { 6606 PetscInt mrows,mcols,mnrows,mncols; 6607 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6608 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6609 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6610 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6611 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6612 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6613 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6614 } 6615 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6616 PetscValidLogicalCollectiveInt(mat,bs,0); 6617 6618 /* prepare IS for sending if not provided */ 6619 if (!is_sends) { 6620 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6621 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6622 } else { 6623 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6624 is_sends_internal = is_sends; 6625 } 6626 6627 /* get comm */ 6628 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6629 6630 /* compute number of sends */ 6631 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6632 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6633 6634 /* compute number of receives */ 6635 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6636 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6637 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6638 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6639 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6640 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6641 ierr = PetscFree(iflags);CHKERRQ(ierr); 6642 6643 /* restrict comm if requested */ 6644 subcomm = 0; 6645 destroy_mat = PETSC_FALSE; 6646 if (restrict_comm) { 6647 PetscMPIInt color,subcommsize; 6648 6649 color = 0; 6650 if (restrict_full) { 6651 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6652 } else { 6653 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6654 } 6655 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6656 subcommsize = commsize - subcommsize; 6657 /* check if reuse has been requested */ 6658 if (reuse) { 6659 if (*mat_n) { 6660 PetscMPIInt subcommsize2; 6661 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6662 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6663 comm_n = PetscObjectComm((PetscObject)*mat_n); 6664 } else { 6665 comm_n = PETSC_COMM_SELF; 6666 } 6667 } else { /* MAT_INITIAL_MATRIX */ 6668 PetscMPIInt rank; 6669 6670 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6671 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6672 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6673 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6674 comm_n = PetscSubcommChild(subcomm); 6675 } 6676 /* flag to destroy *mat_n if not significative */ 6677 if (color) destroy_mat = PETSC_TRUE; 6678 } else { 6679 comm_n = comm; 6680 } 6681 6682 /* prepare send/receive buffers */ 6683 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6684 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6685 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6686 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6687 if (nis) { 6688 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6689 } 6690 6691 /* Get data from local matrices */ 6692 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6693 /* TODO: See below some guidelines on how to prepare the local buffers */ 6694 /* 6695 send_buffer_vals should contain the raw values of the local matrix 6696 send_buffer_idxs should contain: 6697 - MatType_PRIVATE type 6698 - PetscInt size_of_l2gmap 6699 - PetscInt global_row_indices[size_of_l2gmap] 6700 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6701 */ 6702 else { 6703 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6704 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6705 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6706 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6707 send_buffer_idxs[1] = i; 6708 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6709 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6710 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6711 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6712 for (i=0;i<n_sends;i++) { 6713 ilengths_vals[is_indices[i]] = len*len; 6714 ilengths_idxs[is_indices[i]] = len+2; 6715 } 6716 } 6717 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6718 /* additional is (if any) */ 6719 if (nis) { 6720 PetscMPIInt psum; 6721 PetscInt j; 6722 for (j=0,psum=0;j<nis;j++) { 6723 PetscInt plen; 6724 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6725 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6726 psum += len+1; /* indices + lenght */ 6727 } 6728 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6729 for (j=0,psum=0;j<nis;j++) { 6730 PetscInt plen; 6731 const PetscInt *is_array_idxs; 6732 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6733 send_buffer_idxs_is[psum] = plen; 6734 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6735 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6736 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6737 psum += plen+1; /* indices + lenght */ 6738 } 6739 for (i=0;i<n_sends;i++) { 6740 ilengths_idxs_is[is_indices[i]] = psum; 6741 } 6742 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6743 } 6744 6745 buf_size_idxs = 0; 6746 buf_size_vals = 0; 6747 buf_size_idxs_is = 0; 6748 buf_size_vecs = 0; 6749 for (i=0;i<n_recvs;i++) { 6750 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6751 buf_size_vals += (PetscInt)olengths_vals[i]; 6752 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6753 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6754 } 6755 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6756 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6757 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6758 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6759 6760 /* get new tags for clean communications */ 6761 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6762 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6763 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6764 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6765 6766 /* allocate for requests */ 6767 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6768 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6769 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6770 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6771 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6772 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6773 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6774 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6775 6776 /* communications */ 6777 ptr_idxs = recv_buffer_idxs; 6778 ptr_vals = recv_buffer_vals; 6779 ptr_idxs_is = recv_buffer_idxs_is; 6780 ptr_vecs = recv_buffer_vecs; 6781 for (i=0;i<n_recvs;i++) { 6782 source_dest = onodes[i]; 6783 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6784 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6785 ptr_idxs += olengths_idxs[i]; 6786 ptr_vals += olengths_vals[i]; 6787 if (nis) { 6788 source_dest = onodes_is[i]; 6789 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6790 ptr_idxs_is += olengths_idxs_is[i]; 6791 } 6792 if (nvecs) { 6793 source_dest = onodes[i]; 6794 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6795 ptr_vecs += olengths_idxs[i]-2; 6796 } 6797 } 6798 for (i=0;i<n_sends;i++) { 6799 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6800 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6801 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6802 if (nis) { 6803 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6804 } 6805 if (nvecs) { 6806 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6807 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6808 } 6809 } 6810 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6811 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6812 6813 /* assemble new l2g map */ 6814 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6815 ptr_idxs = recv_buffer_idxs; 6816 new_local_rows = 0; 6817 for (i=0;i<n_recvs;i++) { 6818 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6819 ptr_idxs += olengths_idxs[i]; 6820 } 6821 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6822 ptr_idxs = recv_buffer_idxs; 6823 new_local_rows = 0; 6824 for (i=0;i<n_recvs;i++) { 6825 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6826 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6827 ptr_idxs += olengths_idxs[i]; 6828 } 6829 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6830 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6831 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6832 6833 /* infer new local matrix type from received local matrices type */ 6834 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6835 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6836 if (n_recvs) { 6837 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6838 ptr_idxs = recv_buffer_idxs; 6839 for (i=0;i<n_recvs;i++) { 6840 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6841 new_local_type_private = MATAIJ_PRIVATE; 6842 break; 6843 } 6844 ptr_idxs += olengths_idxs[i]; 6845 } 6846 switch (new_local_type_private) { 6847 case MATDENSE_PRIVATE: 6848 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6849 new_local_type = MATSEQAIJ; 6850 bs = 1; 6851 } else { /* if I receive only 1 dense matrix */ 6852 new_local_type = MATSEQDENSE; 6853 bs = 1; 6854 } 6855 break; 6856 case MATAIJ_PRIVATE: 6857 new_local_type = MATSEQAIJ; 6858 bs = 1; 6859 break; 6860 case MATBAIJ_PRIVATE: 6861 new_local_type = MATSEQBAIJ; 6862 break; 6863 case MATSBAIJ_PRIVATE: 6864 new_local_type = MATSEQSBAIJ; 6865 break; 6866 default: 6867 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6868 break; 6869 } 6870 } else { /* by default, new_local_type is seqdense */ 6871 new_local_type = MATSEQDENSE; 6872 bs = 1; 6873 } 6874 6875 /* create MATIS object if needed */ 6876 if (!reuse) { 6877 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6878 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6879 } else { 6880 /* it also destroys the local matrices */ 6881 if (*mat_n) { 6882 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6883 } else { /* this is a fake object */ 6884 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6885 } 6886 } 6887 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6888 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6889 6890 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6891 6892 /* Global to local map of received indices */ 6893 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6894 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6895 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6896 6897 /* restore attributes -> type of incoming data and its size */ 6898 buf_size_idxs = 0; 6899 for (i=0;i<n_recvs;i++) { 6900 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6901 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6902 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6903 } 6904 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6905 6906 /* set preallocation */ 6907 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6908 if (!newisdense) { 6909 PetscInt *new_local_nnz=0; 6910 6911 ptr_idxs = recv_buffer_idxs_local; 6912 if (n_recvs) { 6913 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6914 } 6915 for (i=0;i<n_recvs;i++) { 6916 PetscInt j; 6917 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6918 for (j=0;j<*(ptr_idxs+1);j++) { 6919 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6920 } 6921 } else { 6922 /* TODO */ 6923 } 6924 ptr_idxs += olengths_idxs[i]; 6925 } 6926 if (new_local_nnz) { 6927 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6928 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6929 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6930 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6931 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6932 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6933 } else { 6934 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6935 } 6936 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6937 } else { 6938 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6939 } 6940 6941 /* set values */ 6942 ptr_vals = recv_buffer_vals; 6943 ptr_idxs = recv_buffer_idxs_local; 6944 for (i=0;i<n_recvs;i++) { 6945 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6946 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6947 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6948 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6949 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6950 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6951 } else { 6952 /* TODO */ 6953 } 6954 ptr_idxs += olengths_idxs[i]; 6955 ptr_vals += olengths_vals[i]; 6956 } 6957 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6958 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6959 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6960 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6961 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6962 6963 #if 0 6964 if (!restrict_comm) { /* check */ 6965 Vec lvec,rvec; 6966 PetscReal infty_error; 6967 6968 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6969 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6970 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6971 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6972 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6973 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6974 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6975 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6976 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6977 } 6978 #endif 6979 6980 /* assemble new additional is (if any) */ 6981 if (nis) { 6982 PetscInt **temp_idxs,*count_is,j,psum; 6983 6984 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6985 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6986 ptr_idxs = recv_buffer_idxs_is; 6987 psum = 0; 6988 for (i=0;i<n_recvs;i++) { 6989 for (j=0;j<nis;j++) { 6990 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6991 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6992 psum += plen; 6993 ptr_idxs += plen+1; /* shift pointer to received data */ 6994 } 6995 } 6996 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6997 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6998 for (i=1;i<nis;i++) { 6999 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7000 } 7001 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7002 ptr_idxs = recv_buffer_idxs_is; 7003 for (i=0;i<n_recvs;i++) { 7004 for (j=0;j<nis;j++) { 7005 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7006 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7007 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7008 ptr_idxs += plen+1; /* shift pointer to received data */ 7009 } 7010 } 7011 for (i=0;i<nis;i++) { 7012 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7013 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7014 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7015 } 7016 ierr = PetscFree(count_is);CHKERRQ(ierr); 7017 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7018 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7019 } 7020 /* free workspace */ 7021 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7022 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7023 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7024 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7025 if (isdense) { 7026 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7027 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7028 } else { 7029 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7030 } 7031 if (nis) { 7032 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7033 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7034 } 7035 7036 if (nvecs) { 7037 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7038 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7039 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7040 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7041 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7042 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7043 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7044 /* set values */ 7045 ptr_vals = recv_buffer_vecs; 7046 ptr_idxs = recv_buffer_idxs_local; 7047 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7048 for (i=0;i<n_recvs;i++) { 7049 PetscInt j; 7050 for (j=0;j<*(ptr_idxs+1);j++) { 7051 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7052 } 7053 ptr_idxs += olengths_idxs[i]; 7054 ptr_vals += olengths_idxs[i]-2; 7055 } 7056 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7057 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7058 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7059 } 7060 7061 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7062 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7063 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7064 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7065 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7066 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7067 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7068 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7069 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7070 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7071 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7072 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7073 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7074 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7075 ierr = PetscFree(onodes);CHKERRQ(ierr); 7076 if (nis) { 7077 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7078 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7079 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7080 } 7081 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7082 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7083 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7084 for (i=0;i<nis;i++) { 7085 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7086 } 7087 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7088 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7089 } 7090 *mat_n = NULL; 7091 } 7092 PetscFunctionReturn(0); 7093 } 7094 7095 /* temporary hack into ksp private data structure */ 7096 #include <petsc/private/kspimpl.h> 7097 7098 #undef __FUNCT__ 7099 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 7100 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7101 { 7102 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7103 PC_IS *pcis = (PC_IS*)pc->data; 7104 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7105 Mat coarsedivudotp = NULL; 7106 Mat coarseG,t_coarse_mat_is; 7107 MatNullSpace CoarseNullSpace = NULL; 7108 ISLocalToGlobalMapping coarse_islg; 7109 IS coarse_is,*isarray; 7110 PetscInt i,im_active=-1,active_procs=-1; 7111 PetscInt nis,nisdofs,nisneu,nisvert; 7112 PC pc_temp; 7113 PCType coarse_pc_type; 7114 KSPType coarse_ksp_type; 7115 PetscBool multilevel_requested,multilevel_allowed; 7116 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7117 PetscInt ncoarse,nedcfield; 7118 PetscBool compute_vecs = PETSC_FALSE; 7119 PetscScalar *array; 7120 MatReuse coarse_mat_reuse; 7121 PetscBool restr, full_restr, have_void; 7122 PetscErrorCode ierr; 7123 7124 PetscFunctionBegin; 7125 /* Assign global numbering to coarse dofs */ 7126 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7127 PetscInt ocoarse_size; 7128 compute_vecs = PETSC_TRUE; 7129 ocoarse_size = pcbddc->coarse_size; 7130 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7131 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7132 /* see if we can avoid some work */ 7133 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7134 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7135 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7136 PC pc; 7137 PetscBool isbddc; 7138 7139 /* temporary workaround since PCBDDC does not have a reset method so far */ 7140 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 7141 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 7142 if (isbddc) { 7143 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 7144 } else { 7145 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7146 } 7147 coarse_reuse = PETSC_FALSE; 7148 } else { /* we can safely reuse already computed coarse matrix */ 7149 coarse_reuse = PETSC_TRUE; 7150 } 7151 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7152 coarse_reuse = PETSC_FALSE; 7153 } 7154 /* reset any subassembling information */ 7155 if (!coarse_reuse || pcbddc->recompute_topography) { 7156 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7157 } 7158 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7159 coarse_reuse = PETSC_TRUE; 7160 } 7161 /* assemble coarse matrix */ 7162 if (coarse_reuse && pcbddc->coarse_ksp) { 7163 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7164 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7165 coarse_mat_reuse = MAT_REUSE_MATRIX; 7166 } else { 7167 coarse_mat = NULL; 7168 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7169 } 7170 7171 /* creates temporary l2gmap and IS for coarse indexes */ 7172 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7173 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7174 7175 /* creates temporary MATIS object for coarse matrix */ 7176 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7177 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7178 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7179 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7180 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7181 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7182 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7183 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7184 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7185 7186 /* count "active" (i.e. with positive local size) and "void" processes */ 7187 im_active = !!(pcis->n); 7188 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7189 7190 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7191 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7192 /* full_restr : just use the receivers from the subassembling pattern */ 7193 coarse_mat_is = NULL; 7194 multilevel_allowed = PETSC_FALSE; 7195 multilevel_requested = PETSC_FALSE; 7196 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7197 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7198 if (multilevel_requested) { 7199 ncoarse = active_procs/pcbddc->coarsening_ratio; 7200 restr = PETSC_FALSE; 7201 full_restr = PETSC_FALSE; 7202 } else { 7203 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7204 restr = PETSC_TRUE; 7205 full_restr = PETSC_TRUE; 7206 } 7207 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7208 ncoarse = PetscMax(1,ncoarse); 7209 if (!pcbddc->coarse_subassembling) { 7210 if (pcbddc->coarsening_ratio > 1) { 7211 ierr = MatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7212 } else { 7213 PetscMPIInt size,rank; 7214 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7215 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7216 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7217 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7218 } 7219 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7220 PetscInt psum; 7221 PetscMPIInt size; 7222 if (pcbddc->coarse_ksp) psum = 1; 7223 else psum = 0; 7224 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7225 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7226 if (ncoarse < size) have_void = PETSC_TRUE; 7227 } 7228 /* determine if we can go multilevel */ 7229 if (multilevel_requested) { 7230 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7231 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7232 } 7233 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7234 7235 /* dump subassembling pattern */ 7236 if (pcbddc->dbg_flag && multilevel_allowed) { 7237 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7238 } 7239 7240 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7241 nedcfield = -1; 7242 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7243 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7244 const PetscInt *idxs; 7245 ISLocalToGlobalMapping tmap; 7246 7247 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7248 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7249 /* allocate space for temporary storage */ 7250 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7251 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7252 /* allocate for IS array */ 7253 nisdofs = pcbddc->n_ISForDofsLocal; 7254 if (pcbddc->nedclocal) { 7255 if (pcbddc->nedfield > -1) { 7256 nedcfield = pcbddc->nedfield; 7257 } else { 7258 nedcfield = 0; 7259 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7260 nisdofs = 1; 7261 } 7262 } 7263 nisneu = !!pcbddc->NeumannBoundariesLocal; 7264 nisvert = 0; /* nisvert is not used */ 7265 nis = nisdofs + nisneu + nisvert; 7266 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7267 /* dofs splitting */ 7268 for (i=0;i<nisdofs;i++) { 7269 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7270 if (nedcfield != i) { 7271 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7272 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7273 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7274 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7275 } else { 7276 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7277 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7278 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7279 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7280 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7281 } 7282 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7283 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7284 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7285 } 7286 /* neumann boundaries */ 7287 if (pcbddc->NeumannBoundariesLocal) { 7288 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7289 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7290 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7291 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7292 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7293 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7294 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7295 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7296 } 7297 /* free memory */ 7298 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7299 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7300 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7301 } else { 7302 nis = 0; 7303 nisdofs = 0; 7304 nisneu = 0; 7305 nisvert = 0; 7306 isarray = NULL; 7307 } 7308 /* destroy no longer needed map */ 7309 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7310 7311 /* subassemble */ 7312 if (multilevel_allowed) { 7313 Vec vp[1]; 7314 PetscInt nvecs = 0; 7315 PetscBool reuse,reuser; 7316 7317 if (coarse_mat) reuse = PETSC_TRUE; 7318 else reuse = PETSC_FALSE; 7319 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7320 vp[0] = NULL; 7321 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7322 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7323 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7324 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7325 nvecs = 1; 7326 7327 if (pcbddc->divudotp) { 7328 Mat B,loc_divudotp; 7329 Vec v,p; 7330 IS dummy; 7331 PetscInt np; 7332 7333 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7334 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7335 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7336 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7337 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7338 ierr = VecSet(p,1.);CHKERRQ(ierr); 7339 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7340 ierr = VecDestroy(&p);CHKERRQ(ierr); 7341 ierr = MatDestroy(&B);CHKERRQ(ierr); 7342 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7343 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7344 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7345 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7346 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7347 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7348 ierr = VecDestroy(&v);CHKERRQ(ierr); 7349 } 7350 } 7351 if (reuser) { 7352 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7353 } else { 7354 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7355 } 7356 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7357 PetscScalar *arraym,*arrayv; 7358 PetscInt nl; 7359 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7360 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7361 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7362 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7363 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7364 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7365 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7366 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7367 } else { 7368 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7369 } 7370 } else { 7371 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7372 } 7373 if (coarse_mat_is || coarse_mat) { 7374 PetscMPIInt size; 7375 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7376 if (!multilevel_allowed) { 7377 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7378 } else { 7379 Mat A; 7380 7381 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7382 if (coarse_mat_is) { 7383 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7384 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7385 coarse_mat = coarse_mat_is; 7386 } 7387 /* be sure we don't have MatSeqDENSE as local mat */ 7388 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7389 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7390 } 7391 } 7392 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7393 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7394 7395 /* create local to global scatters for coarse problem */ 7396 if (compute_vecs) { 7397 PetscInt lrows; 7398 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7399 if (coarse_mat) { 7400 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7401 } else { 7402 lrows = 0; 7403 } 7404 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7405 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7406 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7407 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7408 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7409 } 7410 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7411 7412 /* set defaults for coarse KSP and PC */ 7413 if (multilevel_allowed) { 7414 coarse_ksp_type = KSPRICHARDSON; 7415 coarse_pc_type = PCBDDC; 7416 } else { 7417 coarse_ksp_type = KSPPREONLY; 7418 coarse_pc_type = PCREDUNDANT; 7419 } 7420 7421 /* print some info if requested */ 7422 if (pcbddc->dbg_flag) { 7423 if (!multilevel_allowed) { 7424 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7425 if (multilevel_requested) { 7426 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7427 } else if (pcbddc->max_levels) { 7428 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7429 } 7430 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7431 } 7432 } 7433 7434 /* communicate coarse discrete gradient */ 7435 coarseG = NULL; 7436 if (pcbddc->nedcG && multilevel_allowed) { 7437 MPI_Comm ccomm; 7438 if (coarse_mat) { 7439 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7440 } else { 7441 ccomm = MPI_COMM_NULL; 7442 } 7443 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7444 } 7445 7446 /* create the coarse KSP object only once with defaults */ 7447 if (coarse_mat) { 7448 PetscViewer dbg_viewer = NULL; 7449 if (pcbddc->dbg_flag) { 7450 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7451 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7452 } 7453 if (!pcbddc->coarse_ksp) { 7454 char prefix[256],str_level[16]; 7455 size_t len; 7456 7457 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7458 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7459 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7460 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7461 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7462 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7463 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7464 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7465 /* TODO is this logic correct? should check for coarse_mat type */ 7466 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7467 /* prefix */ 7468 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7469 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7470 if (!pcbddc->current_level) { 7471 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7472 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7473 } else { 7474 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7475 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7476 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7477 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7478 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7479 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7480 } 7481 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7482 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7483 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7484 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7485 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7486 /* allow user customization */ 7487 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7488 } 7489 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7490 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7491 if (nisdofs) { 7492 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7493 for (i=0;i<nisdofs;i++) { 7494 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7495 } 7496 } 7497 if (nisneu) { 7498 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7499 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7500 } 7501 if (nisvert) { 7502 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7503 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7504 } 7505 if (coarseG) { 7506 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7507 } 7508 7509 /* get some info after set from options */ 7510 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7511 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7512 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7513 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 7514 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7515 isbddc = PETSC_FALSE; 7516 } 7517 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7518 if (isredundant) { 7519 KSP inner_ksp; 7520 PC inner_pc; 7521 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7522 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7523 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7524 } 7525 7526 /* parameters which miss an API */ 7527 if (isbddc) { 7528 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7529 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7530 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7531 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7532 if (pcbddc_coarse->benign_saddle_point) { 7533 Mat coarsedivudotp_is; 7534 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7535 IS row,col; 7536 const PetscInt *gidxs; 7537 PetscInt n,st,M,N; 7538 7539 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7540 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7541 st = st-n; 7542 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7543 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7544 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7545 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7546 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7547 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7548 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7549 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7550 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7551 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7552 ierr = ISDestroy(&row);CHKERRQ(ierr); 7553 ierr = ISDestroy(&col);CHKERRQ(ierr); 7554 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7555 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7556 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7557 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7558 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7559 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7560 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7561 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7562 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7563 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7564 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7565 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7566 } 7567 } 7568 7569 /* propagate symmetry info of coarse matrix */ 7570 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7571 if (pc->pmat->symmetric_set) { 7572 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7573 } 7574 if (pc->pmat->hermitian_set) { 7575 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7576 } 7577 if (pc->pmat->spd_set) { 7578 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7579 } 7580 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7581 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7582 } 7583 /* set operators */ 7584 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7585 if (pcbddc->dbg_flag) { 7586 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7587 } 7588 } 7589 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7590 ierr = PetscFree(isarray);CHKERRQ(ierr); 7591 #if 0 7592 { 7593 PetscViewer viewer; 7594 char filename[256]; 7595 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7596 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7597 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7598 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7599 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7600 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7601 } 7602 #endif 7603 7604 if (pcbddc->coarse_ksp) { 7605 Vec crhs,csol; 7606 7607 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7608 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7609 if (!csol) { 7610 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7611 } 7612 if (!crhs) { 7613 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7614 } 7615 } 7616 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7617 7618 /* compute null space for coarse solver if the benign trick has been requested */ 7619 if (pcbddc->benign_null) { 7620 7621 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7622 for (i=0;i<pcbddc->benign_n;i++) { 7623 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7624 } 7625 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7626 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7627 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7628 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7629 if (coarse_mat) { 7630 Vec nullv; 7631 PetscScalar *array,*array2; 7632 PetscInt nl; 7633 7634 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7635 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7636 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7637 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7638 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7639 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7640 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7641 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7642 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7643 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7644 } 7645 } 7646 7647 if (pcbddc->coarse_ksp) { 7648 PetscBool ispreonly; 7649 7650 if (CoarseNullSpace) { 7651 PetscBool isnull; 7652 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7653 if (isnull) { 7654 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7655 } 7656 /* TODO: add local nullspaces (if any) */ 7657 } 7658 /* setup coarse ksp */ 7659 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7660 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7661 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7662 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7663 KSP check_ksp; 7664 KSPType check_ksp_type; 7665 PC check_pc; 7666 Vec check_vec,coarse_vec; 7667 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7668 PetscInt its; 7669 PetscBool compute_eigs; 7670 PetscReal *eigs_r,*eigs_c; 7671 PetscInt neigs; 7672 const char *prefix; 7673 7674 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7675 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7676 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7677 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7678 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7679 /* prevent from setup unneeded object */ 7680 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7681 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7682 if (ispreonly) { 7683 check_ksp_type = KSPPREONLY; 7684 compute_eigs = PETSC_FALSE; 7685 } else { 7686 check_ksp_type = KSPGMRES; 7687 compute_eigs = PETSC_TRUE; 7688 } 7689 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7690 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7691 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7692 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7693 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7694 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7695 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7696 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7697 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7698 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7699 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7700 /* create random vec */ 7701 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7702 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7703 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7704 /* solve coarse problem */ 7705 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7706 /* set eigenvalue estimation if preonly has not been requested */ 7707 if (compute_eigs) { 7708 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7709 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7710 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7711 if (neigs) { 7712 lambda_max = eigs_r[neigs-1]; 7713 lambda_min = eigs_r[0]; 7714 if (pcbddc->use_coarse_estimates) { 7715 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7716 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7717 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7718 } 7719 } 7720 } 7721 } 7722 7723 /* check coarse problem residual error */ 7724 if (pcbddc->dbg_flag) { 7725 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7726 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7727 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7728 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7729 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7730 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7731 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7732 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7733 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7734 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7735 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7736 if (CoarseNullSpace) { 7737 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7738 } 7739 if (compute_eigs) { 7740 PetscReal lambda_max_s,lambda_min_s; 7741 KSPConvergedReason reason; 7742 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7743 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7744 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 7745 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7746 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7747 for (i=0;i<neigs;i++) { 7748 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7749 } 7750 } 7751 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7752 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7753 } 7754 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7755 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7756 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7757 if (compute_eigs) { 7758 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7759 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7760 } 7761 } 7762 } 7763 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7764 /* print additional info */ 7765 if (pcbddc->dbg_flag) { 7766 /* waits until all processes reaches this point */ 7767 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7768 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7769 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7770 } 7771 7772 /* free memory */ 7773 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7774 PetscFunctionReturn(0); 7775 } 7776 7777 #undef __FUNCT__ 7778 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7779 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7780 { 7781 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7782 PC_IS* pcis = (PC_IS*)pc->data; 7783 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7784 IS subset,subset_mult,subset_n; 7785 PetscInt local_size,coarse_size=0; 7786 PetscInt *local_primal_indices=NULL; 7787 const PetscInt *t_local_primal_indices; 7788 PetscErrorCode ierr; 7789 7790 PetscFunctionBegin; 7791 /* Compute global number of coarse dofs */ 7792 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7793 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7794 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7795 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7796 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7797 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7798 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7799 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7800 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7801 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7802 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7803 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7804 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7805 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7806 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7807 7808 /* check numbering */ 7809 if (pcbddc->dbg_flag) { 7810 PetscScalar coarsesum,*array,*array2; 7811 PetscInt i; 7812 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7813 7814 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7815 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7816 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7817 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7818 /* counter */ 7819 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7820 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7821 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7822 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7823 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7824 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7825 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7826 for (i=0;i<pcbddc->local_primal_size;i++) { 7827 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7828 } 7829 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7830 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7831 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7832 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7833 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7834 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7835 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7836 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7837 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7838 for (i=0;i<pcis->n;i++) { 7839 if (array[i] != 0.0 && array[i] != array2[i]) { 7840 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7841 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7842 set_error = PETSC_TRUE; 7843 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7845 } 7846 } 7847 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7848 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7849 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7850 for (i=0;i<pcis->n;i++) { 7851 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7852 } 7853 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7854 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7855 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7856 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7857 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7858 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7859 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7860 PetscInt *gidxs; 7861 7862 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7863 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7864 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7865 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7866 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7867 for (i=0;i<pcbddc->local_primal_size;i++) { 7868 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7869 } 7870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7871 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7872 } 7873 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7874 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7875 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7876 } 7877 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7878 /* get back data */ 7879 *coarse_size_n = coarse_size; 7880 *local_primal_indices_n = local_primal_indices; 7881 PetscFunctionReturn(0); 7882 } 7883 7884 #undef __FUNCT__ 7885 #define __FUNCT__ "PCBDDCGlobalToLocal" 7886 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7887 { 7888 IS localis_t; 7889 PetscInt i,lsize,*idxs,n; 7890 PetscScalar *vals; 7891 PetscErrorCode ierr; 7892 7893 PetscFunctionBegin; 7894 /* get indices in local ordering exploiting local to global map */ 7895 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7896 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7897 for (i=0;i<lsize;i++) vals[i] = 1.0; 7898 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7899 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7900 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7901 if (idxs) { /* multilevel guard */ 7902 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7903 } 7904 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7905 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7906 ierr = PetscFree(vals);CHKERRQ(ierr); 7907 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7908 /* now compute set in local ordering */ 7909 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7910 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7911 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7912 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7913 for (i=0,lsize=0;i<n;i++) { 7914 if (PetscRealPart(vals[i]) > 0.5) { 7915 lsize++; 7916 } 7917 } 7918 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7919 for (i=0,lsize=0;i<n;i++) { 7920 if (PetscRealPart(vals[i]) > 0.5) { 7921 idxs[lsize++] = i; 7922 } 7923 } 7924 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7925 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7926 *localis = localis_t; 7927 PetscFunctionReturn(0); 7928 } 7929 7930 #undef __FUNCT__ 7931 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7932 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7933 { 7934 PC_IS *pcis=(PC_IS*)pc->data; 7935 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7936 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7937 Mat S_j; 7938 PetscInt *used_xadj,*used_adjncy; 7939 PetscBool free_used_adj; 7940 PetscErrorCode ierr; 7941 7942 PetscFunctionBegin; 7943 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7944 free_used_adj = PETSC_FALSE; 7945 if (pcbddc->sub_schurs_layers == -1) { 7946 used_xadj = NULL; 7947 used_adjncy = NULL; 7948 } else { 7949 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7950 used_xadj = pcbddc->mat_graph->xadj; 7951 used_adjncy = pcbddc->mat_graph->adjncy; 7952 } else if (pcbddc->computed_rowadj) { 7953 used_xadj = pcbddc->mat_graph->xadj; 7954 used_adjncy = pcbddc->mat_graph->adjncy; 7955 } else { 7956 PetscBool flg_row=PETSC_FALSE; 7957 const PetscInt *xadj,*adjncy; 7958 PetscInt nvtxs; 7959 7960 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7961 if (flg_row) { 7962 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7963 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7964 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7965 free_used_adj = PETSC_TRUE; 7966 } else { 7967 pcbddc->sub_schurs_layers = -1; 7968 used_xadj = NULL; 7969 used_adjncy = NULL; 7970 } 7971 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7972 } 7973 } 7974 7975 /* setup sub_schurs data */ 7976 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7977 if (!sub_schurs->schur_explicit) { 7978 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7979 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7980 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7981 } else { 7982 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7983 PetscBool isseqaij,need_change = PETSC_FALSE; 7984 PetscInt benign_n; 7985 Mat change = NULL; 7986 Vec scaling = NULL; 7987 IS change_primal = NULL; 7988 7989 if (!pcbddc->use_vertices && reuse_solvers) { 7990 PetscInt n_vertices; 7991 7992 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7993 reuse_solvers = (PetscBool)!n_vertices; 7994 } 7995 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7996 if (!isseqaij) { 7997 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7998 if (matis->A == pcbddc->local_mat) { 7999 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8000 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8001 } else { 8002 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8003 } 8004 } 8005 if (!pcbddc->benign_change_explicit) { 8006 benign_n = pcbddc->benign_n; 8007 } else { 8008 benign_n = 0; 8009 } 8010 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8011 We need a global reduction to avoid possible deadlocks. 8012 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8013 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8014 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8015 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8016 need_change = (PetscBool)(!need_change); 8017 } 8018 /* If the user defines additional constraints, we import them here. 8019 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8020 if (need_change) { 8021 PC_IS *pcisf; 8022 PC_BDDC *pcbddcf; 8023 PC pcf; 8024 8025 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8026 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8027 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8028 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8029 /* hacks */ 8030 pcisf = (PC_IS*)pcf->data; 8031 pcisf->is_B_local = pcis->is_B_local; 8032 pcisf->vec1_N = pcis->vec1_N; 8033 pcisf->BtoNmap = pcis->BtoNmap; 8034 pcisf->n = pcis->n; 8035 pcisf->n_B = pcis->n_B; 8036 pcbddcf = (PC_BDDC*)pcf->data; 8037 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8038 pcbddcf->mat_graph = pcbddc->mat_graph; 8039 pcbddcf->use_faces = PETSC_TRUE; 8040 pcbddcf->use_change_of_basis = PETSC_TRUE; 8041 pcbddcf->use_change_on_faces = PETSC_TRUE; 8042 pcbddcf->use_qr_single = PETSC_TRUE; 8043 pcbddcf->fake_change = PETSC_TRUE; 8044 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8045 /* store information on primal vertices and change of basis (in local numbering) */ 8046 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8047 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8048 change = pcbddcf->ConstraintMatrix; 8049 pcbddcf->ConstraintMatrix = NULL; 8050 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8051 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8052 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8053 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8054 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8055 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8056 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8057 pcf->ops->destroy = NULL; 8058 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8059 } 8060 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8061 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8062 ierr = MatDestroy(&change);CHKERRQ(ierr); 8063 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8064 } 8065 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8066 8067 /* free adjacency */ 8068 if (free_used_adj) { 8069 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8070 } 8071 PetscFunctionReturn(0); 8072 } 8073 8074 #undef __FUNCT__ 8075 #define __FUNCT__ "PCBDDCInitSubSchurs" 8076 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8077 { 8078 PC_IS *pcis=(PC_IS*)pc->data; 8079 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8080 PCBDDCGraph graph; 8081 PetscErrorCode ierr; 8082 8083 PetscFunctionBegin; 8084 /* attach interface graph for determining subsets */ 8085 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8086 IS verticesIS,verticescomm; 8087 PetscInt vsize,*idxs; 8088 8089 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8090 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8091 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8092 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8093 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8094 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8095 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8096 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8097 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8098 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8099 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8100 } else { 8101 graph = pcbddc->mat_graph; 8102 } 8103 /* print some info */ 8104 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8105 IS vertices; 8106 PetscInt nv,nedges,nfaces; 8107 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8108 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8109 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8110 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8111 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8112 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8114 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8115 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8116 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8117 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8118 } 8119 8120 /* sub_schurs init */ 8121 if (!pcbddc->sub_schurs) { 8122 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8123 } 8124 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8125 8126 /* free graph struct */ 8127 if (pcbddc->sub_schurs_rebuild) { 8128 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8129 } 8130 PetscFunctionReturn(0); 8131 } 8132 8133 #undef __FUNCT__ 8134 #define __FUNCT__ "PCBDDCCheckOperator" 8135 PetscErrorCode PCBDDCCheckOperator(PC pc) 8136 { 8137 PC_IS *pcis=(PC_IS*)pc->data; 8138 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8139 PetscErrorCode ierr; 8140 8141 PetscFunctionBegin; 8142 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8143 IS zerodiag = NULL; 8144 Mat S_j,B0_B=NULL; 8145 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8146 PetscScalar *p0_check,*array,*array2; 8147 PetscReal norm; 8148 PetscInt i; 8149 8150 /* B0 and B0_B */ 8151 if (zerodiag) { 8152 IS dummy; 8153 8154 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8155 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8156 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8157 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8158 } 8159 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8160 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8161 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8162 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8163 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8164 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8165 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8166 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8167 /* S_j */ 8168 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8169 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8170 8171 /* mimic vector in \widetilde{W}_\Gamma */ 8172 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8173 /* continuous in primal space */ 8174 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8175 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8176 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8177 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8178 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8179 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8180 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8181 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8182 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8183 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8184 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8185 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8186 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8187 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8188 8189 /* assemble rhs for coarse problem */ 8190 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8191 /* local with Schur */ 8192 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8193 if (zerodiag) { 8194 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8195 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8196 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8197 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8198 } 8199 /* sum on primal nodes the local contributions */ 8200 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8201 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8202 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8203 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8204 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8205 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8206 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8207 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8208 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8209 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8210 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8211 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8212 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8213 /* scale primal nodes (BDDC sums contibutions) */ 8214 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8215 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8216 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8217 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8218 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8219 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8220 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8221 /* global: \widetilde{B0}_B w_\Gamma */ 8222 if (zerodiag) { 8223 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8224 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8225 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8226 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8227 } 8228 /* BDDC */ 8229 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8230 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8231 8232 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8233 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8234 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8235 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8236 for (i=0;i<pcbddc->benign_n;i++) { 8237 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8238 } 8239 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8240 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8241 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8242 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8243 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8244 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8245 } 8246 PetscFunctionReturn(0); 8247 } 8248 8249 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8250 #undef __FUNCT__ 8251 #define __FUNCT__ "MatMPIAIJRestrict" 8252 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8253 { 8254 Mat At; 8255 IS rows; 8256 PetscInt rst,ren; 8257 PetscErrorCode ierr; 8258 PetscLayout rmap; 8259 8260 PetscFunctionBegin; 8261 rst = ren = 0; 8262 if (ccomm != MPI_COMM_NULL) { 8263 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8264 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8265 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8266 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8267 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8268 } 8269 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8270 ierr = MatGetSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8271 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8272 8273 if (ccomm != MPI_COMM_NULL) { 8274 Mat_MPIAIJ *a,*b; 8275 IS from,to; 8276 Vec gvec; 8277 PetscInt lsize; 8278 8279 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8280 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8281 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8282 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8283 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8284 a = (Mat_MPIAIJ*)At->data; 8285 b = (Mat_MPIAIJ*)(*B)->data; 8286 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8287 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8288 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8289 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8290 b->A = a->A; 8291 b->B = a->B; 8292 8293 b->donotstash = a->donotstash; 8294 b->roworiented = a->roworiented; 8295 b->rowindices = 0; 8296 b->rowvalues = 0; 8297 b->getrowactive = PETSC_FALSE; 8298 8299 (*B)->rmap = rmap; 8300 (*B)->factortype = A->factortype; 8301 (*B)->assembled = PETSC_TRUE; 8302 (*B)->insertmode = NOT_SET_VALUES; 8303 (*B)->preallocated = PETSC_TRUE; 8304 8305 if (a->colmap) { 8306 #if defined(PETSC_USE_CTABLE) 8307 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8308 #else 8309 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8310 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8311 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8312 #endif 8313 } else b->colmap = 0; 8314 if (a->garray) { 8315 PetscInt len; 8316 len = a->B->cmap->n; 8317 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8318 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8319 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8320 } else b->garray = 0; 8321 8322 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8323 b->lvec = a->lvec; 8324 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8325 8326 /* cannot use VecScatterCopy */ 8327 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8328 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8329 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8330 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8331 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8332 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8333 ierr = ISDestroy(&from);CHKERRQ(ierr); 8334 ierr = ISDestroy(&to);CHKERRQ(ierr); 8335 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8336 } 8337 ierr = MatDestroy(&At);CHKERRQ(ierr); 8338 PetscFunctionReturn(0); 8339 } 8340