1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 #include <petsc/private/sfimpl.h> 6 7 /* returns B s.t. range(B) _|_ range(A) */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDense_OrthogonalComplement" 10 PetscErrorCode MatDense_OrthogonalComplement(Mat A, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 11 { 12 #if !defined(PETSC_USE_COMPLEX) 13 PetscScalar *uwork,*data,*U, ds = 0.; 14 PetscReal *sing; 15 PetscBLASInt bM,bN,lwork,lierr,di = 1; 16 PetscInt ulw,i,nr,nc,n; 17 PetscErrorCode ierr; 18 19 PetscFunctionBegin; 20 #if defined(PETSC_MISSING_LAPACK_GESVD) 21 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 22 #endif 23 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 24 if (!nr || !nc) PetscFunctionReturn(0); 25 26 /* workspace */ 27 if (!work) { 28 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 29 ierr = PetscMalloc1(ulw,&uwork); 30 } else { 31 ulw = lw; 32 uwork = work; 33 } 34 n = PetscMin(nr,nc); 35 if (!rwork) { 36 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 37 } else { 38 sing = rwork; 39 } 40 41 /* SVD */ 42 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 43 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 44 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 45 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 46 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 47 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 48 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 49 ierr = PetscFPTrapPop();CHKERRQ(ierr); 50 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 51 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 52 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 53 if (!rwork) { 54 ierr = PetscFree(sing);CHKERRQ(ierr); 55 } 56 if (!work) { 57 ierr = PetscFree(uwork);CHKERRQ(ierr); 58 } 59 /* create B */ 60 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 61 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 62 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 64 ierr = PetscFree(U);CHKERRQ(ierr); 65 #else 66 PetscFunctionBegin; 67 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 68 #endif 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "PCBDDCComputeNedelecChangeEdge" 74 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, Mat* Gins, Mat* GKins, PetscScalar *work, PetscReal *rwork) 75 { 76 PetscErrorCode ierr; 77 Mat GE,GEd; 78 PetscInt rsize,csize,esize; 79 PetscScalar *ptr; 80 81 PetscFunctionBegin; 82 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 83 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 84 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 85 86 /* gradients */ 87 ptr = work + 5*esize; 88 ierr = MatGetSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 89 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 90 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 91 ierr = MatDestroy(&GE);CHKERRQ(ierr); 92 93 /* constants */ 94 ptr += rsize*csize; 95 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 96 ierr = MatGetSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 97 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 98 ierr = MatDestroy(&GE);CHKERRQ(ierr); 99 ierr = MatDense_OrthogonalComplement(GEd,5*esize,work,rwork,GKins);CHKERRQ(ierr); 100 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "PCBDDCNedelecSupport" 106 PetscErrorCode PCBDDCNedelecSupport(PC pc) 107 { 108 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 109 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 110 Mat G,T,conn,lG,lGt,lGis,lGall,lGw; 111 MatNullSpace nnsp; 112 Vec tvec,*quads; 113 PetscSF sfv; 114 ISLocalToGlobalMapping el2g,vl2g; 115 MPI_Comm comm; 116 IS lned,primals; 117 IS *eedges,*extrows,*extcols; 118 PetscBT btv,bte,btb,btvcand,iwork; 119 PetscScalar *vals,*work; 120 PetscReal *rwork; 121 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 122 PetscInt ne,nv,Ne,Nv,Le,Lv,order; 123 PetscInt n_neigh,*neigh,*n_shared,**shared; 124 PetscInt *interior; 125 PetscInt i,j,extmem,cum,maxsize,rst,nee,nquads=2; 126 PetscInt *extrow,*extrowcum,*marks,*emarks,*vmarks,*gidxs; 127 PetscInt *sfvleaves,*sfvroots; 128 PetscBool print,eerr,done,lrc[2],conforming,ismpiaij; 129 PetscErrorCode ierr; 130 131 PetscFunctionBegin; 132 /* test variable order code and print debug info TODO: to be removed */ 133 print = PETSC_FALSE; 134 ierr = PetscOptionsGetBool(NULL,NULL,"-pc_bddc_print_nedelec",&print,NULL);CHKERRQ(ierr); 135 ierr = PetscOptionsGetInt(NULL,NULL,"-pc_bddc_nedelec_order",&pcbddc->nedorder,NULL);CHKERRQ(ierr); 136 137 /* Return to caller if there are no edges in the decomposition */ 138 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 139 ierr = MatGetLocalToGlobalMapping(pc->pmat,&el2g,NULL);CHKERRQ(ierr); 140 ierr = ISLocalToGlobalMappingGetSize(el2g,&ne);CHKERRQ(ierr); 141 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 142 lrc[0] = PETSC_FALSE; 143 for (i=0;i<ne;i++) { 144 if (PetscRealPart(vals[i]) > 2.) { 145 lrc[0] = PETSC_TRUE; 146 break; 147 } 148 } 149 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 150 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 151 if (!lrc[1]) PetscFunctionReturn(0); 152 153 /* Get relevant objects */ 154 G = pcbddc->discretegradient; 155 order = pcbddc->nedorder; 156 conforming = pcbddc->conforming; 157 158 /* Compute local and global sizes of egde dofs and nodal dofs */ 159 ierr = MatGetSize(G,&Ne,&Nv);CHKERRQ(ierr); 160 ierr = MatGetLocalSize(G,&Le,&Lv);CHKERRQ(ierr); 161 162 /* Sanity checks */ 163 if (!order) SETERRQ(comm,PETSC_ERR_SUP,"Variable order not yet implemented"); 164 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 165 ierr = PetscObjectTypeCompare((PetscObject)G,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 166 if (!ismpiaij) SETERRQ1(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with discrete gradient of type %s. Please use MPIAIJ",((PetscObject)G)->type_name); 167 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 168 if (order && Ne%order) SETERRQ2(comm,PETSC_ERR_USER,"The number of edge dofs %d it's not a multiple of the order %d",Ne,order); 169 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 170 ierr = MatGetSize(pc->pmat,&i,NULL);CHKERRQ(ierr); 171 if (Ne != i) SETERRQ2(comm,PETSC_ERR_USER,"Global matrix sizes should match! Number of rows of G %d differs from that of A %d",Ne,i); 172 ierr = MatGetLocalSize(pc->pmat,&i,NULL);CHKERRQ(ierr); 173 if (Le != i) SETERRQ2(comm,PETSC_ERR_USER,"Local matrix sizes should match! Number of local rows of G %d differs from that of A %d",Le,i); 174 175 /* Drop connections for interior edges (this modifies G) */ 176 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 177 ierr = VecScatterBegin(matis->rctx,matis->counter,tvec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 178 ierr = VecScatterEnd(matis->rctx,matis->counter,tvec,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 179 ierr = PetscMalloc1(Le,&marks);CHKERRQ(ierr); 180 ierr = VecGetOwnershipRange(tvec,&rst,NULL);CHKERRQ(ierr); 181 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&vals);CHKERRQ(ierr); 182 cum = 0; 183 for (i=0;i<Le;i++) if (PetscRealPart(vals[i]) < 1.5) marks[cum++] = i+rst; 184 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&vals);CHKERRQ(ierr); 185 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 186 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 187 ierr = MatZeroRows(G,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 188 ierr = PetscFree(marks);CHKERRQ(ierr); 189 190 /* Extract subdomain relevant rows of G */ 191 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 192 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 193 ierr = MatGetSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 194 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 195 ierr = ISDestroy(&lned);CHKERRQ(ierr); 196 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 197 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 198 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 199 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 200 if (print) { 201 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 202 ierr = MatView(lG,NULL);CHKERRQ(ierr); 203 } 204 205 /* SF for nodal communications */ 206 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 207 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 208 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 209 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 210 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 211 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 212 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 213 ierr = PetscMalloc2(nv,&sfvleaves,Lv,&sfvroots);CHKERRQ(ierr); 214 215 /* Destroy temporary G created in MATIS format */ 216 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 217 218 /* Analyze the edge-nodes connections (duplicate lG) */ 219 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGw);CHKERRQ(ierr); 220 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 221 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 222 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 223 /* need to import the boundary specification to ensure the 224 proper detection of coarse edges' endpoints */ 225 if (pcbddc->DirichletBoundariesLocal) { 226 ierr = ISGetLocalSize(pcbddc->DirichletBoundariesLocal,&cum);CHKERRQ(ierr); 227 ierr = ISGetIndices(pcbddc->DirichletBoundariesLocal,&idxs);CHKERRQ(ierr); 228 for (i=0;i<cum;i++) { 229 if (idxs[i] >= 0) { 230 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 231 } 232 } 233 ierr = ISRestoreIndices(pcbddc->DirichletBoundariesLocal,&idxs);CHKERRQ(ierr); 234 } 235 if (pcbddc->NeumannBoundariesLocal) { 236 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&cum);CHKERRQ(ierr); 237 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 238 for (i=0;i<cum;i++) { 239 if (idxs[i] >= 0) { 240 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 241 } 242 } 243 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 244 } 245 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 246 /* need to remove coarse faces' dofs to ensure the 247 proper detection of coarse edges' endpoints */ 248 ierr = PetscCalloc1(ne,&marks);CHKERRQ(ierr); 249 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 250 for (i=1;i<n_neigh;i++) { 251 PetscInt j; 252 for (j=0;j<n_shared[i];j++) marks[shared[i][j]]++; 253 } 254 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 255 cum = 0; 256 ierr = MatGetRowIJ(lGw,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 257 for (i=0;i<ne;i++) { 258 if (!marks[i] || (marks[i] == 1 && !PetscBTLookup(btb,i))) { /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 259 marks[cum++] = i; 260 } else if (!conforming && ii[i+1]-ii[i] != order + 1) { /* set badly connected edge dofs as primal */ 261 marks[cum++] = i; 262 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 263 for (j=ii[i];j<ii[i+1];j++) { 264 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 265 } 266 } 267 } 268 ierr = MatRestoreRowIJ(lGw,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 269 ierr = MatSetOption(lGw,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 270 ierr = MatZeroRows(lGw,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 271 /* identify splitpoints and corner candidates: TODO variable order */ 272 ierr = MatTranspose(lGw,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 273 if (print) { 274 ierr = PetscObjectSetName((PetscObject)lGw,"work_lG");CHKERRQ(ierr); 275 ierr = MatView(lGw,NULL);CHKERRQ(ierr); 276 ierr = PetscObjectSetName((PetscObject)lGt,"work_lGt");CHKERRQ(ierr); 277 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 278 } 279 ierr = MatDestroy(&lGw);CHKERRQ(ierr); 280 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 281 for (i=0;i<nv;i++) { 282 #if defined(PETSC_USE_DEBUG) 283 if ((ii[i+1]-ii[i])%order) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",ii[i+1]-ii[i],i,order); 284 #endif 285 if (ii[i+1]-ii[i] >= 3*order) { /* splitpoints */ 286 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d\n",i); 287 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 288 } else if (ii[i+1]-ii[i] == order) { 289 if (order == 1) { 290 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 291 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 292 } else { 293 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 294 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 295 } 296 } 297 } 298 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 299 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 300 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 301 302 /* Get the local G^T explicitly */ 303 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 304 if (print) { 305 ierr = PetscObjectSetName((PetscObject)lGt,"initial_lGt");CHKERRQ(ierr); 306 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 307 } 308 309 /* Eliminate interior nodal dofs */ 310 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 311 ierr = PetscBTCreate(nv,&iwork);CHKERRQ(ierr); 312 ierr = PetscMalloc1(nv,&interior);CHKERRQ(ierr); 313 for (i=1;i<n_neigh;i++) { 314 for (j=0;j<n_shared[i];j++) { 315 ierr = PetscBTSet(iwork,shared[i][j]);CHKERRQ(ierr); 316 } 317 } 318 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 319 for (i=0,cum=0;i<nv;i++) if (!PetscBTLookup(iwork,i)) interior[cum++] = i; 320 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 321 ierr = MatZeroRows(lGt,cum,interior,0.,NULL,NULL);CHKERRQ(ierr); 322 if (print) { 323 IS tbz; 324 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,interior,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 325 ierr = PetscObjectSetName((PetscObject)tbz,"interior_nodal_dofs");CHKERRQ(ierr); 326 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 327 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 328 } 329 ierr = PetscBTDestroy(&iwork);CHKERRQ(ierr); 330 ierr = PetscFree(interior);CHKERRQ(ierr); 331 332 /* communicate corners and splitpoints */ 333 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 334 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 335 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 336 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 337 338 if (print) { 339 IS tbz; 340 341 cum = 0; 342 for (i=0;i<nv;i++) 343 if (sfvleaves[i]) 344 vmarks[cum++] = i; 345 346 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 347 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 348 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 349 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 350 } 351 352 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 353 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 354 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 355 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 356 357 /* Zero rows of lGt corresponding to identified corners (if any) 358 TODO: this can be merged with the previous zerorows call */ 359 cum = 0; 360 for (i=0;i<nv;i++) { 361 if (sfvleaves[i]) { 362 vmarks[cum++] = i; 363 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 364 } 365 } 366 if (print) { 367 IS tbz; 368 369 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 370 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed");CHKERRQ(ierr); 371 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 372 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 373 } 374 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 375 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 376 ierr = PetscFree(vmarks);CHKERRQ(ierr); 377 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 378 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 379 380 /* Recompute G */ 381 ierr = MatDestroy(&lG);CHKERRQ(ierr); 382 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 383 if (print) { 384 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 385 ierr = MatView(lG,NULL);CHKERRQ(ierr); 386 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 387 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 388 } 389 390 /* Get primal dofs (if any) */ 391 cum = 0; 392 for (i=0;i<ne;i++) { 393 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 394 } 395 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 398 ierr = ISView(primals,NULL);CHKERRQ(ierr); 399 } 400 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 401 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 402 ierr = ISDestroy(&primals);CHKERRQ(ierr); 403 404 /* Compute edge connectivity */ 405 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 406 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 407 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 408 ierr = PCBDDCSetLocalAdjacencyGraph(pc,ne,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 409 410 /* Analyze interface for edge dofs */ 411 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 412 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 413 414 /* Get coarse edges in the edge space */ 415 pcbddc->mat_graph->twodim = PETSC_FALSE; 416 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 417 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 418 419 /* Mark fine edge dofs with their coarse edge id */ 420 maxsize = 0; 421 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0;i<nee;i++) { 423 PetscInt size,mark = i+1; 424 425 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 426 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 427 for (j=0;j<size;j++) marks[idxs[j]] = mark; 428 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 429 maxsize = PetscMax(maxsize,size); 430 } 431 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 432 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 433 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 434 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 435 if (print) { 436 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 437 ierr = ISView(primals,NULL);CHKERRQ(ierr); 438 } 439 440 /* Find coarse edge endpoints */ 441 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 442 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 443 for (i=0;i<nee;i++) { 444 PetscInt mark = i+1,size; 445 446 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 447 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 448 if (print) { 449 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 450 ISView(eedges[i],NULL); 451 } 452 for (j=0;j<size;j++) { 453 PetscInt k, ee = idxs[j]; 454 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 455 for (k=ii[ee];k<ii[ee+1];k++) { 456 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 457 if (PetscBTLookup(btv,jj[k])) { 458 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 459 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 460 if (print) PetscPrintf(PETSC_COMM_SELF," candidate? %d\n",(PetscBool)(PetscBTLookup(btvcand,jj[k]))); 461 PetscInt k2; 462 PetscBool corner = PETSC_FALSE; 463 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 464 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: count %d, mark %d (ref mark %d), special %d\n",jjt[k2],pcbddc->mat_graph->count[jjt[k2]],marks[jjt[k2]],mark,pcbddc->mat_graph->special_dof[jjt[k2]]); 465 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || 466 pcbddc->mat_graph->special_dof[jjt[k2]] == PCBDDCGRAPH_DIRICHLET_MARK) { 467 corner = PETSC_TRUE; 468 break; 469 } 470 } 471 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 472 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 473 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 474 } else { 475 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 476 } 477 } 478 } 479 } 480 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 481 } 482 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 483 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 484 485 /* Reset marked primal dofs */ 486 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 487 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 488 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 489 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 490 491 /* Compute extended cols indices */ 492 ierr = MatGetRowIJ(lG ,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 493 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 494 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 495 i *= maxsize; 496 ierr = PetscMalloc1(nee,&extcols);CHKERRQ(ierr); 497 ierr = PetscCalloc1(nee,&emarks);CHKERRQ(ierr); 498 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 499 eerr = PETSC_FALSE; 500 for (i=0;i<nee;i++) { 501 PetscInt size; 502 503 cum = 0; 504 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 505 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 506 for (j=0;j<size;j++) { 507 PetscInt k,ee = idxs[j]; 508 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 509 } 510 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 511 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 512 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 513 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 514 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 515 /* it may happen that endpoints are not defined at this point 516 if it is the case, mark this edge for a second pass */ 517 if (cum != size -1) { 518 emarks[i] = 1; 519 if (print) { 520 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 521 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 522 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 523 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 524 } 525 eerr = PETSC_TRUE; 526 } 527 } 528 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 529 if (done) { 530 PetscInt *newprimals; 531 532 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 533 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 534 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 535 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 536 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 537 for (i=0;i<nee;i++) { 538 if (emarks[i]) { 539 PetscInt size,mark = i+1; 540 541 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 542 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 543 for (j=0;j<size;j++) { 544 PetscInt k,ee = idxs[j]; 545 for (k=ii[ee];k<ii[ee+1];k++) { 546 /* set all candidates located on the edge as corners */ 547 if (PetscBTLookup(btvcand,jj[k])) { 548 PetscInt k2,vv = jj[k]; 549 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 550 /* set all edge dofs connected to candidate as primals */ 551 for (k2=iit[vv];k2<iit[vv+1];k2++) { 552 if (marks[jjt[k2]] == mark) { 553 PetscInt k3,ee2 = jjt[k2]; 554 newprimals[cum++] = ee2; 555 /* finally set the new corners */ 556 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 557 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 } 563 } 564 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 565 } 566 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 567 } 568 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 569 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 570 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 571 ierr = PetscFree(newprimals);CHKERRQ(ierr); 572 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 573 ierr = ISDestroy(&primals);CHKERRQ(ierr); 574 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 575 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 576 577 /* Mark again */ 578 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 579 for (i=0;i<nee;i++) { 580 PetscInt size,mark = i+1; 581 582 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 583 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 584 for (j=0;j<size;j++) marks[idxs[j]] = mark; 585 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 586 } 587 if (print) { 588 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 589 ierr = ISView(primals,NULL);CHKERRQ(ierr); 590 } 591 592 /* Recompute extended cols */ 593 eerr = PETSC_FALSE; 594 for (i=0;i<nee;i++) { 595 PetscInt size; 596 597 cum = 0; 598 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 599 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 600 for (j=0;j<size;j++) { 601 PetscInt k,ee = idxs[j]; 602 for (k=ii[ee];k<ii[ee+1];k++) { 603 if (!PetscBTLookup(btv,jj[k])) { 604 extrow[cum++] = jj[k]; 605 } 606 } 607 } 608 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 609 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 610 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 611 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 612 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 613 if (cum != size -1) { 614 if (print) { 615 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 616 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 617 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 618 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 619 } 620 eerr = PETSC_TRUE; 621 } 622 } 623 } 624 ierr = MatRestoreRowIJ( lG,0,PETSC_FALSE,PETSC_FALSE,&i, &ii, &jj,&done);CHKERRQ(ierr); 625 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 626 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 627 ierr = PetscFree(emarks);CHKERRQ(ierr); 628 /* an error should not occur at this point */ 629 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 630 631 #if defined(PETSC_USE_DEBUG) 632 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 633 not interfere with neighbouring coarse edges */ 634 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 635 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 636 for (i=0;i<nv;i++) { 637 PetscInt emax = 0,eemax = 0; 638 639 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 640 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 641 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 642 for (j=1;j<nee+1;j++) { 643 if (emax < emarks[j]) { 644 emax = emarks[j]; 645 eemax = j; 646 } 647 } 648 /* not relevant for edges */ 649 if (!eemax) continue; 650 651 for (j=ii[i];j<ii[i+1];j++) { 652 if (marks[jj[j]] && marks[jj[j]] != eemax) { 653 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dod %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 654 } 655 } 656 } 657 ierr = PetscFree(emarks);CHKERRQ(ierr); 658 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 659 #endif 660 661 /* Compute extended rows indices for edge blocks of the change of basis */ 662 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 663 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 664 extmem *= maxsize; 665 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 666 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 667 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 668 for (i=0;i<nv;i++) { 669 PetscInt mark = 0,size,start; 670 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 671 for (j=ii[i];j<ii[i+1];j++) 672 if (marks[jj[j]] && !mark) 673 mark = marks[jj[j]]; 674 675 /* not relevant */ 676 if (!mark) continue; 677 678 /* import extended row */ 679 mark--; 680 start = mark*extmem+extrowcum[mark]; 681 size = ii[i+1]-ii[i]; 682 #if defined(PETSC_USE_DEBUG) 683 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 684 #endif 685 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 686 extrowcum[mark] += size; 687 } 688 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 689 cum = 0; 690 for (i=0;i<nee;i++) { 691 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 692 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 693 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 694 cum = PetscMax(cum,size); 695 } 696 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 697 ierr = PetscFree(marks);CHKERRQ(ierr); 698 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 699 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 700 701 /* Workspace for lapack inner calls and VecSetValues */ 702 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 703 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 704 for (i=0;i<maxsize;i++) vals[i] = 1.; 705 706 /* Create vectors for quadrature rules */ 707 ierr = PetscMalloc1(nquads,&quads);CHKERRQ(ierr); 708 for (i=0;i<nquads;i++) { 709 ierr = MatCreateVecs(pc->pmat,&quads[i],NULL);CHKERRQ(ierr); 710 ierr = VecSetLocalToGlobalMapping(quads[i],el2g);CHKERRQ(ierr); 711 } 712 ierr = PCBDDCNullSpaceCreate(comm,PETSC_FALSE,nquads,quads,&nnsp);CHKERRQ(ierr); 713 714 /* Create change of basis matrix (preallocation can be improved) */ 715 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 716 ierr = MatSetSizes(T,Le,Le,Ne,Ne);CHKERRQ(ierr); 717 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 718 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 719 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 720 ierr = MatSetLocalToGlobalMapping(T,el2g,el2g);CHKERRQ(ierr); 721 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 723 ierr = MatSetOption(T,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 724 725 /* Defaults to identity */ 726 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 727 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 728 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 729 730 for (i=0;i<nee;i++) { 731 Mat Gins = NULL, GKins = NULL; 732 733 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],&Gins,&GKins,work,rwork);CHKERRQ(ierr); 734 if (Gins && GKins) { 735 PetscScalar *data; 736 const PetscInt *rows,*cols; 737 PetscInt nrh,nch,nrc,ncc; 738 739 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 740 /* H1 */ 741 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 742 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 743 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 744 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 745 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 746 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 747 /* complement */ 748 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 749 if (ncc > nquads-1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet supported ncc %d nquads %d",ncc,nquads); 750 if (ncc + nch != nrc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d",ncc,nch,nrc); 751 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 752 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 753 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 754 /* Gins kernel quadratures */ 755 for (j=0;j<ncc;j++) { 756 ierr = VecSetValueLocal(quads[j],cols[nch+j],1.,INSERT_VALUES);CHKERRQ(ierr); 757 } 758 /* H1 average */ 759 ierr = VecSetValuesLocal(quads[nquads-1],nch,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 760 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 761 } 762 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 763 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 764 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 765 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 766 } 767 768 /* Start assembling */ 769 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 770 for (i=0;i<nquads;i++) { 771 ierr = VecAssemblyBegin(quads[i]);CHKERRQ(ierr); 772 } 773 774 /* Free */ 775 ierr = PetscFree(extrow);CHKERRQ(ierr); 776 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 777 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 778 ierr = PetscFree(vals);CHKERRQ(ierr); 779 ierr = PetscFree(extrows);CHKERRQ(ierr); 780 ierr = PetscFree(extcols);CHKERRQ(ierr); 781 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&eedges,&primals);CHKERRQ(ierr); 782 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 783 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 784 ierr = MatDestroy(&lG);CHKERRQ(ierr); 785 ierr = MatDestroy(&conn);CHKERRQ(ierr); 786 787 /* Complete assembling */ 788 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 789 for (i=0;i<nquads;i++) { 790 ierr = VecAssemblyEnd(quads[i]);CHKERRQ(ierr); 791 } 792 for (i=0;i<nquads;i++) { 793 ierr = VecDestroy(&quads[i]);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(quads);CHKERRQ(ierr); 796 797 /* tell PCBDDC the topography has been analyzed */ 798 pcbddc->recompute_topography = PETSC_FALSE; 799 800 /* set change of basis */ 801 ierr = PCBDDCSetChangeOfBasisMat(pc,T,PETSC_FALSE);CHKERRQ(ierr); 802 ierr = MatDestroy(&T);CHKERRQ(ierr); 803 804 /* set quadratures */ 805 ierr = MatSetNearNullSpace(pc->pmat,nnsp);CHKERRQ(ierr); 806 ierr = MatNullSpaceDestroy(&nnsp);CHKERRQ(ierr); 807 808 PetscFunctionReturn(0); 809 } 810 811 /* the near-null space of BDDC carries information on quadrature weights, 812 and these can be collinear -> so cheat with MatNullSpaceCreate 813 and create a suitable set of basis vectors first */ 814 #undef __FUNCT__ 815 #define __FUNCT__ "PCBDDCNullSpaceCreate" 816 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 817 { 818 PetscErrorCode ierr; 819 PetscInt i; 820 821 PetscFunctionBegin; 822 for (i=0;i<nvecs;i++) { 823 PetscInt first,last; 824 825 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 826 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 827 if (i>=first && i < last) { 828 PetscScalar *data; 829 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 830 if (!has_const) { 831 data[i-first] = 1.; 832 } else { 833 data[2*i-first] = 1./PetscSqrtReal(2.); 834 data[2*i-first+1] = -1./PetscSqrtReal(2.); 835 } 836 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 837 } 838 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 839 } 840 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 841 for (i=0;i<nvecs;i++) { /* reset vectors */ 842 PetscInt first,last; 843 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 844 if (i>=first && i < last) { 845 PetscScalar *data; 846 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 847 if (!has_const) { 848 data[i-first] = 0.; 849 } else { 850 data[2*i-first] = 0.; 851 data[2*i-first+1] = 0.; 852 } 853 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 854 } 855 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 856 } 857 PetscFunctionReturn(0); 858 } 859 860 #undef __FUNCT__ 861 #define __FUNCT__ "PCBDDCComputeNoNetFlux" 862 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 863 { 864 Mat loc_divudotp; 865 Vec p,v,vins,quad_vec,*quad_vecs; 866 ISLocalToGlobalMapping map; 867 IS *faces,*edges; 868 PetscScalar *vals; 869 const PetscScalar *array; 870 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 871 PetscMPIInt rank; 872 PetscErrorCode ierr; 873 874 PetscFunctionBegin; 875 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 876 if (graph->twodim) { 877 lmaxneighs = 2; 878 } else { 879 lmaxneighs = 1; 880 for (i=0;i<ne;i++) { 881 const PetscInt *idxs; 882 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 883 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 884 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 885 } 886 lmaxneighs++; /* graph count does not include self */ 887 } 888 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 889 maxsize = 0; 890 for (i=0;i<ne;i++) { 891 PetscInt nn; 892 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 893 maxsize = PetscMax(maxsize,nn); 894 } 895 for (i=0;i<nf;i++) { 896 PetscInt nn; 897 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 898 maxsize = PetscMax(maxsize,nn); 899 } 900 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 901 /* create vectors to hold quadrature weights */ 902 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 903 if (!transpose) { 904 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 905 } else { 906 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 907 } 908 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 909 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 910 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 911 for (i=0;i<maxneighs;i++) { 912 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 913 } 914 915 /* compute local quad vec */ 916 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 917 if (!transpose) { 918 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 919 } else { 920 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 921 } 922 ierr = VecSet(p,1.);CHKERRQ(ierr); 923 if (!transpose) { 924 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 925 } else { 926 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 927 } 928 if (vl2l) { 929 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 930 } else { 931 vins = v; 932 } 933 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 934 ierr = VecDestroy(&p);CHKERRQ(ierr); 935 936 /* insert in global quadrature vecs */ 937 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 938 for (i=0;i<nf;i++) { 939 const PetscInt *idxs; 940 PetscInt idx,nn,j; 941 942 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 943 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 944 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 945 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 946 idx = -(idx+1); 947 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 948 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 949 } 950 for (i=0;i<ne;i++) { 951 const PetscInt *idxs; 952 PetscInt idx,nn,j; 953 954 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 955 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 956 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 957 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 958 idx = -(idx+1); 959 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 960 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 961 } 962 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 963 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 964 if (vl2l) { 965 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 966 } 967 ierr = VecDestroy(&v);CHKERRQ(ierr); 968 ierr = PetscFree(vals);CHKERRQ(ierr); 969 970 /* assemble near null space */ 971 for (i=0;i<maxneighs;i++) { 972 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 973 } 974 for (i=0;i<maxneighs;i++) { 975 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 976 } 977 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 982 #undef __FUNCT__ 983 #define __FUNCT__ "PCBDDCComputeLocalTopologyInfo" 984 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 985 { 986 PetscErrorCode ierr; 987 Vec local,global; 988 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 989 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 990 991 PetscFunctionBegin; 992 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 993 /* need to convert from global to local topology information and remove references to information in global ordering */ 994 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 995 if (pcbddc->user_provided_isfordofs) { 996 if (pcbddc->n_ISForDofs) { 997 PetscInt i; 998 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 999 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1000 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1001 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1002 } 1003 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1004 pcbddc->n_ISForDofs = 0; 1005 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1006 } 1007 } else { 1008 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering if bs > 1 */ 1009 PetscInt i, n = matis->A->rmap->n; 1010 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1011 if (i > 1) { 1012 pcbddc->n_ISForDofsLocal = i; 1013 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1014 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1015 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1016 } 1017 } 1018 } 1019 } 1020 1021 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1022 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1023 } 1024 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1025 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1026 } 1027 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1028 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1029 } 1030 ierr = VecDestroy(&global);CHKERRQ(ierr); 1031 ierr = VecDestroy(&local);CHKERRQ(ierr); 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #undef __FUNCT__ 1036 #define __FUNCT__ "PCBDDCBenignRemoveInterior" 1037 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1038 { 1039 PC_IS *pcis = (PC_IS*)(pc->data); 1040 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1041 PetscErrorCode ierr; 1042 1043 PetscFunctionBegin; 1044 if (!pcbddc->benign_have_null) { 1045 PetscFunctionReturn(0); 1046 } 1047 if (pcbddc->ChangeOfBasisMatrix) { 1048 Vec swap; 1049 1050 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1051 swap = pcbddc->work_change; 1052 pcbddc->work_change = r; 1053 r = swap; 1054 } 1055 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1056 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1057 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1058 ierr = VecSet(z,0.);CHKERRQ(ierr); 1059 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1060 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1061 if (pcbddc->ChangeOfBasisMatrix) { 1062 Vec swap; 1063 1064 swap = r; 1065 r = pcbddc->work_change; 1066 pcbddc->work_change = swap; 1067 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1068 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1069 } 1070 PetscFunctionReturn(0); 1071 } 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "PCBDDCBenignMatMult_Private_Private" 1075 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1076 { 1077 PCBDDCBenignMatMult_ctx ctx; 1078 PetscErrorCode ierr; 1079 PetscBool apply_right,apply_left,reset_x; 1080 1081 PetscFunctionBegin; 1082 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1083 if (transpose) { 1084 apply_right = ctx->apply_left; 1085 apply_left = ctx->apply_right; 1086 } else { 1087 apply_right = ctx->apply_right; 1088 apply_left = ctx->apply_left; 1089 } 1090 reset_x = PETSC_FALSE; 1091 if (apply_right) { 1092 const PetscScalar *ax; 1093 PetscInt nl,i; 1094 1095 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1096 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1097 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1098 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1099 for (i=0;i<ctx->benign_n;i++) { 1100 PetscScalar sum,val; 1101 const PetscInt *idxs; 1102 PetscInt nz,j; 1103 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1104 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1105 sum = 0.; 1106 if (ctx->apply_p0) { 1107 val = ctx->work[idxs[nz-1]]; 1108 for (j=0;j<nz-1;j++) { 1109 sum += ctx->work[idxs[j]]; 1110 ctx->work[idxs[j]] += val; 1111 } 1112 } else { 1113 for (j=0;j<nz-1;j++) { 1114 sum += ctx->work[idxs[j]]; 1115 } 1116 } 1117 ctx->work[idxs[nz-1]] -= sum; 1118 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1119 } 1120 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1121 reset_x = PETSC_TRUE; 1122 } 1123 if (transpose) { 1124 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1125 } else { 1126 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1127 } 1128 if (reset_x) { 1129 ierr = VecResetArray(x);CHKERRQ(ierr); 1130 } 1131 if (apply_left) { 1132 PetscScalar *ay; 1133 PetscInt i; 1134 1135 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1136 for (i=0;i<ctx->benign_n;i++) { 1137 PetscScalar sum,val; 1138 const PetscInt *idxs; 1139 PetscInt nz,j; 1140 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1141 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1142 val = -ay[idxs[nz-1]]; 1143 if (ctx->apply_p0) { 1144 sum = 0.; 1145 for (j=0;j<nz-1;j++) { 1146 sum += ay[idxs[j]]; 1147 ay[idxs[j]] += val; 1148 } 1149 ay[idxs[nz-1]] += sum; 1150 } else { 1151 for (j=0;j<nz-1;j++) { 1152 ay[idxs[j]] += val; 1153 } 1154 ay[idxs[nz-1]] = 0.; 1155 } 1156 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1157 } 1158 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1159 } 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "PCBDDCBenignMatMultTranspose_Private" 1165 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1166 { 1167 PetscErrorCode ierr; 1168 1169 PetscFunctionBegin; 1170 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "PCBDDCBenignMatMult_Private" 1176 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1177 { 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "PCBDDCBenignShellMat" 1187 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1188 { 1189 PC_IS *pcis = (PC_IS*)pc->data; 1190 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1191 PCBDDCBenignMatMult_ctx ctx; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 if (!restore) { 1196 Mat A_IB,A_BI; 1197 PetscScalar *work; 1198 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1199 1200 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1201 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1202 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1203 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1204 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1205 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1206 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1207 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1208 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1209 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1210 ctx->apply_left = PETSC_TRUE; 1211 ctx->apply_right = PETSC_FALSE; 1212 ctx->apply_p0 = PETSC_FALSE; 1213 ctx->benign_n = pcbddc->benign_n; 1214 if (reuse) { 1215 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1216 ctx->free = PETSC_FALSE; 1217 } else { /* TODO: could be optimized for successive solves */ 1218 ISLocalToGlobalMapping N_to_D; 1219 PetscInt i; 1220 1221 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1222 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1223 for (i=0;i<pcbddc->benign_n;i++) { 1224 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1225 } 1226 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1227 ctx->free = PETSC_TRUE; 1228 } 1229 ctx->A = pcis->A_IB; 1230 ctx->work = work; 1231 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1232 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1233 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1234 pcis->A_IB = A_IB; 1235 1236 /* A_BI as A_IB^T */ 1237 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1238 pcbddc->benign_original_mat = pcis->A_BI; 1239 pcis->A_BI = A_BI; 1240 } else { 1241 if (!pcbddc->benign_original_mat) { 1242 PetscFunctionReturn(0); 1243 } 1244 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1245 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1246 pcis->A_IB = ctx->A; 1247 ctx->A = NULL; 1248 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1249 pcis->A_BI = pcbddc->benign_original_mat; 1250 pcbddc->benign_original_mat = NULL; 1251 if (ctx->free) { 1252 PetscInt i; 1253 for (i=0;i<ctx->benign_n;i++) { 1254 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1255 } 1256 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1257 } 1258 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1259 ierr = PetscFree(ctx);CHKERRQ(ierr); 1260 } 1261 PetscFunctionReturn(0); 1262 } 1263 1264 /* used just in bddc debug mode */ 1265 #undef __FUNCT__ 1266 #define __FUNCT__ "PCBDDCBenignProject" 1267 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1268 { 1269 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1270 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1271 Mat An; 1272 PetscErrorCode ierr; 1273 1274 PetscFunctionBegin; 1275 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1276 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1277 if (is1) { 1278 ierr = MatGetSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1279 ierr = MatDestroy(&An);CHKERRQ(ierr); 1280 } else { 1281 *B = An; 1282 } 1283 PetscFunctionReturn(0); 1284 } 1285 1286 /* TODO: add reuse flag */ 1287 #undef __FUNCT__ 1288 #define __FUNCT__ "MatSeqAIJCompress" 1289 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 1290 { 1291 Mat Bt; 1292 PetscScalar *a,*bdata; 1293 const PetscInt *ii,*ij; 1294 PetscInt m,n,i,nnz,*bii,*bij; 1295 PetscBool flg_row; 1296 PetscErrorCode ierr; 1297 1298 PetscFunctionBegin; 1299 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 1300 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1301 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 1302 nnz = n; 1303 for (i=0;i<ii[n];i++) { 1304 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 1305 } 1306 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 1307 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 1308 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 1309 nnz = 0; 1310 bii[0] = 0; 1311 for (i=0;i<n;i++) { 1312 PetscInt j; 1313 for (j=ii[i];j<ii[i+1];j++) { 1314 PetscScalar entry = a[j]; 1315 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 1316 bij[nnz] = ij[j]; 1317 bdata[nnz] = entry; 1318 nnz++; 1319 } 1320 } 1321 bii[i+1] = nnz; 1322 } 1323 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 1324 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 1325 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 1326 { 1327 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 1328 b->free_a = PETSC_TRUE; 1329 b->free_ij = PETSC_TRUE; 1330 } 1331 *B = Bt; 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatDetectDisconnectedComponents" 1337 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 1338 { 1339 Mat B; 1340 IS is_dummy,*cc_n; 1341 ISLocalToGlobalMapping l2gmap_dummy; 1342 PCBDDCGraph graph; 1343 PetscInt i,n; 1344 PetscInt *xadj,*adjncy; 1345 PetscInt *xadj_filtered,*adjncy_filtered; 1346 PetscBool flg_row,isseqaij; 1347 PetscErrorCode ierr; 1348 1349 PetscFunctionBegin; 1350 if (!A->rmap->N || !A->cmap->N) { 1351 *ncc = 0; 1352 *cc = NULL; 1353 PetscFunctionReturn(0); 1354 } 1355 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1356 if (!isseqaij && filter) { 1357 PetscBool isseqdense; 1358 1359 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 1360 if (!isseqdense) { 1361 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1362 } else { /* TODO: rectangular case and LDA */ 1363 PetscScalar *array; 1364 PetscReal chop=1.e-6; 1365 1366 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 1367 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 1368 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 1369 for (i=0;i<n;i++) { 1370 PetscInt j; 1371 for (j=i+1;j<n;j++) { 1372 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 1373 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 1374 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 1375 } 1376 } 1377 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 1378 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1379 } 1380 } else { 1381 B = A; 1382 } 1383 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1384 1385 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 1386 if (filter) { 1387 PetscScalar *data; 1388 PetscInt j,cum; 1389 1390 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 1391 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 1392 cum = 0; 1393 for (i=0;i<n;i++) { 1394 PetscInt t; 1395 1396 for (j=xadj[i];j<xadj[i+1];j++) { 1397 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 1398 continue; 1399 } 1400 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 1401 } 1402 t = xadj_filtered[i]; 1403 xadj_filtered[i] = cum; 1404 cum += t; 1405 } 1406 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 1407 } else { 1408 xadj_filtered = NULL; 1409 adjncy_filtered = NULL; 1410 } 1411 1412 /* compute local connected components using PCBDDCGraph */ 1413 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 1414 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 1415 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1416 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 1417 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 1418 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 1419 if (xadj_filtered) { 1420 graph->xadj = xadj_filtered; 1421 graph->adjncy = adjncy_filtered; 1422 } else { 1423 graph->xadj = xadj; 1424 graph->adjncy = adjncy; 1425 } 1426 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 1427 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 1428 /* partial clean up */ 1429 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 1430 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 1431 if (A != B) { 1432 ierr = MatDestroy(&B);CHKERRQ(ierr); 1433 } 1434 1435 /* get back data */ 1436 if (ncc) *ncc = graph->ncc; 1437 if (cc) { 1438 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 1439 for (i=0;i<graph->ncc;i++) { 1440 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 1441 } 1442 *cc = cc_n; 1443 } 1444 /* clean up graph */ 1445 graph->xadj = 0; 1446 graph->adjncy = 0; 1447 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "PCBDDCBenignCheck" 1453 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 1454 { 1455 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1456 PC_IS* pcis = (PC_IS*)(pc->data); 1457 IS dirIS = NULL; 1458 PetscInt i; 1459 PetscErrorCode ierr; 1460 1461 PetscFunctionBegin; 1462 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 1463 if (zerodiag) { 1464 Mat A; 1465 Vec vec3_N; 1466 PetscScalar *vals; 1467 const PetscInt *idxs; 1468 PetscInt nz,*count; 1469 1470 /* p0 */ 1471 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 1472 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 1473 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1474 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1475 for (i=0;i<nz;i++) vals[i] = 1.; 1476 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1477 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 1478 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 1479 /* v_I */ 1480 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 1481 for (i=0;i<nz;i++) vals[i] = 0.; 1482 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1483 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1484 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1485 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 1486 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1487 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1488 if (dirIS) { 1489 PetscInt n; 1490 1491 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 1492 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 1493 for (i=0;i<n;i++) vals[i] = 0.; 1494 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1495 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 1496 } 1497 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 1498 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 1499 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 1500 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 1501 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1502 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 1503 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 1504 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 1505 ierr = PetscFree(vals);CHKERRQ(ierr); 1506 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 1507 1508 /* there should not be any pressure dofs lying on the interface */ 1509 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 1510 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1511 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 1512 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 1513 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1514 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 1515 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1516 ierr = PetscFree(count);CHKERRQ(ierr); 1517 } 1518 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 1519 1520 /* check PCBDDCBenignGetOrSetP0 */ 1521 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 1522 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 1523 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 1524 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 1525 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 1526 for (i=0;i<pcbddc->benign_n;i++) { 1527 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 1528 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 1529 } 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 1535 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 1536 { 1537 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1538 IS pressures,zerodiag,*zerodiag_subs; 1539 PetscInt nz,n; 1540 PetscInt *interior_dofs,n_interior_dofs; 1541 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1546 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1547 for (n=0;n<pcbddc->benign_n;n++) { 1548 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 1549 } 1550 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1551 pcbddc->benign_n = 0; 1552 /* if a local info on dofs is present, assumes that the last field represents "pressures" 1553 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 1554 Checks if all the pressure dofs in each subdomain have a zero diagonal 1555 If not, a change of basis on pressures is not needed 1556 since the local Schur complements are already SPD 1557 */ 1558 has_null_pressures = PETSC_TRUE; 1559 have_null = PETSC_TRUE; 1560 if (pcbddc->n_ISForDofsLocal) { 1561 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 1562 1563 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 1564 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 1565 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1566 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 1567 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 1568 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 1569 if (!sorted) { 1570 ierr = ISSort(pressures);CHKERRQ(ierr); 1571 } 1572 } else { 1573 pressures = NULL; 1574 } 1575 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 1576 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1577 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 1578 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 1579 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 1580 if (!sorted) { 1581 ierr = ISSort(zerodiag);CHKERRQ(ierr); 1582 } 1583 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1584 if (!nz) { 1585 if (n) have_null = PETSC_FALSE; 1586 has_null_pressures = PETSC_FALSE; 1587 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1588 } 1589 recompute_zerodiag = PETSC_FALSE; 1590 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 1591 zerodiag_subs = NULL; 1592 pcbddc->benign_n = 0; 1593 n_interior_dofs = 0; 1594 interior_dofs = NULL; 1595 if (pcbddc->current_level) { /* need to compute interior nodes */ 1596 PetscInt n,i,j; 1597 PetscInt n_neigh,*neigh,*n_shared,**shared; 1598 PetscInt *iwork; 1599 1600 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 1601 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1602 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 1603 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 1604 for (i=1;i<n_neigh;i++) 1605 for (j=0;j<n_shared[i];j++) 1606 iwork[shared[i][j]] += 1; 1607 for (i=0;i<n;i++) 1608 if (!iwork[i]) 1609 interior_dofs[n_interior_dofs++] = i; 1610 ierr = PetscFree(iwork);CHKERRQ(ierr); 1611 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1612 } 1613 if (has_null_pressures) { 1614 IS *subs; 1615 PetscInt nsubs,i,j,nl; 1616 const PetscInt *idxs; 1617 PetscScalar *array; 1618 Vec *work; 1619 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 1620 1621 subs = pcbddc->local_subs; 1622 nsubs = pcbddc->n_local_subs; 1623 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 1624 if (pcbddc->current_level) { 1625 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 1626 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 1627 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 1628 /* work[0] = 1_p */ 1629 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 1630 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 1631 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1632 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 1633 /* work[0] = 1_v */ 1634 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 1635 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 1636 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 1637 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 1638 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 1639 } 1640 if (nsubs > 1) { 1641 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 1642 for (i=0;i<nsubs;i++) { 1643 ISLocalToGlobalMapping l2g; 1644 IS t_zerodiag_subs; 1645 PetscInt nl; 1646 1647 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 1648 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 1649 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 1650 if (nl) { 1651 PetscBool valid = PETSC_TRUE; 1652 1653 if (pcbddc->current_level) { 1654 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 1655 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 1656 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 1657 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1658 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 1659 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1660 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 1661 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 1662 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 1663 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 1664 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 1665 for (j=0;j<n_interior_dofs;j++) { 1666 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1667 valid = PETSC_FALSE; 1668 break; 1669 } 1670 } 1671 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 1672 } 1673 if (valid && pcbddc->NeumannBoundariesLocal) { 1674 IS t_bc; 1675 PetscInt nzb; 1676 1677 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pcbddc->NeumannBoundariesLocal,&t_bc);CHKERRQ(ierr); 1678 ierr = ISGetLocalSize(t_bc,&nzb);CHKERRQ(ierr); 1679 ierr = ISDestroy(&t_bc);CHKERRQ(ierr); 1680 if (nzb) valid = PETSC_FALSE; 1681 } 1682 if (valid && pressures) { 1683 IS t_pressure_subs; 1684 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 1685 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 1686 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 1687 } 1688 if (valid) { 1689 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 1690 pcbddc->benign_n++; 1691 } else { 1692 recompute_zerodiag = PETSC_TRUE; 1693 } 1694 } 1695 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 1696 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 1697 } 1698 } else { /* there's just one subdomain (or zero if they have not been detected */ 1699 PetscBool valid = PETSC_TRUE; 1700 1701 if (pcbddc->NeumannBoundariesLocal) { 1702 PetscInt nzb; 1703 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nzb);CHKERRQ(ierr); 1704 if (nzb) valid = PETSC_FALSE; 1705 } 1706 if (valid && pressures) { 1707 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 1708 } 1709 if (valid && pcbddc->current_level) { 1710 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 1711 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 1712 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 1713 for (j=0;j<n_interior_dofs;j++) { 1714 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 1715 valid = PETSC_FALSE; 1716 break; 1717 } 1718 } 1719 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 1720 } 1721 if (valid) { 1722 pcbddc->benign_n = 1; 1723 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 1724 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 1725 zerodiag_subs[0] = zerodiag; 1726 } 1727 } 1728 if (pcbddc->current_level) { 1729 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 1730 } 1731 } 1732 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 1733 1734 if (!pcbddc->benign_n) { 1735 PetscInt n; 1736 1737 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1738 recompute_zerodiag = PETSC_FALSE; 1739 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 1740 if (n) { 1741 has_null_pressures = PETSC_FALSE; 1742 have_null = PETSC_FALSE; 1743 } 1744 } 1745 1746 /* final check for null pressures */ 1747 if (zerodiag && pressures) { 1748 PetscInt nz,np; 1749 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1750 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 1751 if (nz != np) have_null = PETSC_FALSE; 1752 } 1753 1754 if (recompute_zerodiag) { 1755 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 1756 if (pcbddc->benign_n == 1) { 1757 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 1758 zerodiag = zerodiag_subs[0]; 1759 } else { 1760 PetscInt i,nzn,*new_idxs; 1761 1762 nzn = 0; 1763 for (i=0;i<pcbddc->benign_n;i++) { 1764 PetscInt ns; 1765 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1766 nzn += ns; 1767 } 1768 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 1769 nzn = 0; 1770 for (i=0;i<pcbddc->benign_n;i++) { 1771 PetscInt ns,*idxs; 1772 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 1773 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1774 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 1776 nzn += ns; 1777 } 1778 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 1779 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 1780 } 1781 have_null = PETSC_FALSE; 1782 } 1783 1784 /* Prepare matrix to compute no-net-flux */ 1785 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 1786 Mat A,loc_divudotp; 1787 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 1788 IS row,col,isused = NULL; 1789 PetscInt M,N,n,st,n_isused; 1790 1791 if (pressures) { 1792 isused = pressures; 1793 } else { 1794 isused = zerodiag; 1795 } 1796 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 1797 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 1798 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 1799 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 1800 n_isused = 0; 1801 if (isused) { 1802 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 1803 } 1804 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1805 st = st-n_isused; 1806 if (n) { 1807 const PetscInt *gidxs; 1808 1809 ierr = MatGetSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 1810 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1811 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 1812 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1813 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1814 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 1815 } else { 1816 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 1817 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 1818 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 1819 } 1820 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 1821 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 1822 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 1823 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 1824 ierr = ISDestroy(&row);CHKERRQ(ierr); 1825 ierr = ISDestroy(&col);CHKERRQ(ierr); 1826 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 1827 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 1828 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 1829 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 1830 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 1831 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 1832 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 1833 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 1834 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1835 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1836 } 1837 1838 /* change of basis and p0 dofs */ 1839 if (has_null_pressures) { 1840 IS zerodiagc; 1841 const PetscInt *idxs,*idxsc; 1842 PetscInt i,s,*nnz; 1843 1844 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 1845 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 1846 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1847 /* local change of basis for pressures */ 1848 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1849 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 1850 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 1851 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1852 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 1853 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 1854 for (i=0;i<pcbddc->benign_n;i++) { 1855 PetscInt nzs,j; 1856 1857 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 1858 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1859 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 1860 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 1861 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1862 } 1863 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 1864 ierr = PetscFree(nnz);CHKERRQ(ierr); 1865 /* set identity on velocities */ 1866 for (i=0;i<n-nz;i++) { 1867 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 1868 } 1869 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 1870 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 1871 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1872 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 1873 /* set change on pressures */ 1874 for (s=0;s<pcbddc->benign_n;s++) { 1875 PetscScalar *array; 1876 PetscInt nzs; 1877 1878 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 1879 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1880 for (i=0;i<nzs-1;i++) { 1881 PetscScalar vals[2]; 1882 PetscInt cols[2]; 1883 1884 cols[0] = idxs[i]; 1885 cols[1] = idxs[nzs-1]; 1886 vals[0] = 1.; 1887 vals[1] = 1.; 1888 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 1889 } 1890 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 1891 for (i=0;i<nzs-1;i++) array[i] = -1.; 1892 array[nzs-1] = 1.; 1893 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 1894 /* store local idxs for p0 */ 1895 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 1896 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 1897 ierr = PetscFree(array);CHKERRQ(ierr); 1898 } 1899 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1900 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1901 /* project if needed */ 1902 if (pcbddc->benign_change_explicit) { 1903 Mat M; 1904 1905 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1906 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1907 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1908 ierr = MatDestroy(&M);CHKERRQ(ierr); 1909 } 1910 /* store global idxs for p0 */ 1911 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1912 } 1913 pcbddc->benign_zerodiag_subs = zerodiag_subs; 1914 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 1915 1916 /* determines if the coarse solver will be singular or not */ 1917 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1918 /* determines if the problem has subdomains with 0 pressure block */ 1919 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1920 *zerodiaglocal = zerodiag; 1921 PetscFunctionReturn(0); 1922 } 1923 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 1926 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 1927 { 1928 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1929 PetscScalar *array; 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 if (!pcbddc->benign_sf) { 1934 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 1935 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 1936 } 1937 if (get) { 1938 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1939 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1940 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 1941 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 1942 } else { 1943 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 1944 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1945 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 1946 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 1947 } 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 1953 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 1954 { 1955 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1956 PetscErrorCode ierr; 1957 1958 PetscFunctionBegin; 1959 /* TODO: add error checking 1960 - avoid nested pop (or push) calls. 1961 - cannot push before pop. 1962 - cannot call this if pcbddc->local_mat is NULL 1963 */ 1964 if (!pcbddc->benign_n) { 1965 PetscFunctionReturn(0); 1966 } 1967 if (pop) { 1968 if (pcbddc->benign_change_explicit) { 1969 IS is_p0; 1970 MatReuse reuse; 1971 1972 /* extract B_0 */ 1973 reuse = MAT_INITIAL_MATRIX; 1974 if (pcbddc->benign_B0) { 1975 reuse = MAT_REUSE_MATRIX; 1976 } 1977 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 1978 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 1979 /* remove rows and cols from local problem */ 1980 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 1981 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1982 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 1983 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 1984 } else { 1985 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1986 PetscScalar *vals; 1987 PetscInt i,n,*idxs_ins; 1988 1989 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 1990 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 1991 if (!pcbddc->benign_B0) { 1992 PetscInt *nnz; 1993 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 1994 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 1995 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1996 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 1997 for (i=0;i<pcbddc->benign_n;i++) { 1998 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 1999 nnz[i] = n - nnz[i]; 2000 } 2001 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2002 ierr = PetscFree(nnz);CHKERRQ(ierr); 2003 } 2004 2005 for (i=0;i<pcbddc->benign_n;i++) { 2006 PetscScalar *array; 2007 PetscInt *idxs,j,nz,cum; 2008 2009 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2010 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2011 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2012 for (j=0;j<nz;j++) vals[j] = 1.; 2013 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2014 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2015 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2016 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2017 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2018 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2019 cum = 0; 2020 for (j=0;j<n;j++) { 2021 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2022 vals[cum] = array[j]; 2023 idxs_ins[cum] = j; 2024 cum++; 2025 } 2026 } 2027 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2028 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2029 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2030 } 2031 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2032 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2033 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2034 } 2035 } else { /* push */ 2036 if (pcbddc->benign_change_explicit) { 2037 PetscInt i; 2038 2039 for (i=0;i<pcbddc->benign_n;i++) { 2040 PetscScalar *B0_vals; 2041 PetscInt *B0_cols,B0_ncol; 2042 2043 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2044 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2045 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2046 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2047 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2048 } 2049 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2050 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2051 } else { 2052 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2053 } 2054 } 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "PCBDDCAdaptiveSelection" 2060 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2061 { 2062 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2063 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2064 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2065 PetscBLASInt *B_iwork,*B_ifail; 2066 PetscScalar *work,lwork; 2067 PetscScalar *St,*S,*eigv; 2068 PetscScalar *Sarray,*Starray; 2069 PetscReal *eigs,thresh; 2070 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2071 PetscBool allocated_S_St; 2072 #if defined(PETSC_USE_COMPLEX) 2073 PetscReal *rwork; 2074 #endif 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2079 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2080 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2081 2082 if (pcbddc->dbg_flag) { 2083 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2084 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2085 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2086 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2087 } 2088 2089 if (pcbddc->dbg_flag) { 2090 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2091 } 2092 2093 /* max size of subsets */ 2094 mss = 0; 2095 for (i=0;i<sub_schurs->n_subs;i++) { 2096 PetscInt subset_size; 2097 2098 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2099 mss = PetscMax(mss,subset_size); 2100 } 2101 2102 /* min/max and threshold */ 2103 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2104 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2105 nmax = PetscMax(nmin,nmax); 2106 allocated_S_St = PETSC_FALSE; 2107 if (nmin) { 2108 allocated_S_St = PETSC_TRUE; 2109 } 2110 2111 /* allocate lapack workspace */ 2112 cum = cum2 = 0; 2113 maxneigs = 0; 2114 for (i=0;i<sub_schurs->n_subs;i++) { 2115 PetscInt n,subset_size; 2116 2117 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2118 n = PetscMin(subset_size,nmax); 2119 cum += subset_size; 2120 cum2 += subset_size*n; 2121 maxneigs = PetscMax(maxneigs,n); 2122 } 2123 if (mss) { 2124 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2125 PetscBLASInt B_itype = 1; 2126 PetscBLASInt B_N = mss; 2127 PetscReal zero = 0.0; 2128 PetscReal eps = 0.0; /* dlamch? */ 2129 2130 B_lwork = -1; 2131 S = NULL; 2132 St = NULL; 2133 eigs = NULL; 2134 eigv = NULL; 2135 B_iwork = NULL; 2136 B_ifail = NULL; 2137 #if defined(PETSC_USE_COMPLEX) 2138 rwork = NULL; 2139 #endif 2140 thresh = 1.0; 2141 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2142 #if defined(PETSC_USE_COMPLEX) 2143 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2144 #else 2145 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2146 #endif 2147 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 2148 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2149 } else { 2150 /* TODO */ 2151 } 2152 } else { 2153 lwork = 0; 2154 } 2155 2156 nv = 0; 2157 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 2158 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 2159 } 2160 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 2161 if (allocated_S_St) { 2162 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 2163 } 2164 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 2165 #if defined(PETSC_USE_COMPLEX) 2166 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 2167 #endif 2168 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 2169 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 2170 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 2171 nv+cum,&pcbddc->adaptive_constraints_idxs, 2172 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2173 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 2174 2175 maxneigs = 0; 2176 cum = cumarray = 0; 2177 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 2178 pcbddc->adaptive_constraints_data_ptr[0] = 0; 2179 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2180 const PetscInt *idxs; 2181 2182 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2183 for (cum=0;cum<nv;cum++) { 2184 pcbddc->adaptive_constraints_n[cum] = 1; 2185 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 2186 pcbddc->adaptive_constraints_data[cum] = 1.0; 2187 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 2188 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 2189 } 2190 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 2191 } 2192 2193 if (mss) { /* multilevel */ 2194 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2195 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2196 } 2197 2198 thresh = pcbddc->adaptive_threshold; 2199 for (i=0;i<sub_schurs->n_subs;i++) { 2200 const PetscInt *idxs; 2201 PetscReal upper,lower; 2202 PetscInt j,subset_size,eigs_start = 0; 2203 PetscBLASInt B_N; 2204 PetscBool same_data = PETSC_FALSE; 2205 2206 if (pcbddc->use_deluxe_scaling) { 2207 upper = PETSC_MAX_REAL; 2208 lower = thresh; 2209 } else { 2210 upper = 1./thresh; 2211 lower = 0.; 2212 } 2213 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2214 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2215 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 2216 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 2217 if (sub_schurs->is_hermitian) { 2218 PetscInt j,k; 2219 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 2220 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2221 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2222 } 2223 for (j=0;j<subset_size;j++) { 2224 for (k=j;k<subset_size;k++) { 2225 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2226 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2227 } 2228 } 2229 } else { 2230 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2231 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2232 } 2233 } else { 2234 S = Sarray + cumarray; 2235 St = Starray + cumarray; 2236 } 2237 /* see if we can save some work */ 2238 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 2239 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 2240 } 2241 2242 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 2243 B_neigs = 0; 2244 } else { 2245 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2246 PetscBLASInt B_itype = 1; 2247 PetscBLASInt B_IL, B_IU; 2248 PetscReal eps = -1.0; /* dlamch? */ 2249 PetscInt nmin_s; 2250 PetscBool compute_range = PETSC_FALSE; 2251 2252 if (pcbddc->dbg_flag) { 2253 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 2254 } 2255 2256 compute_range = PETSC_FALSE; 2257 if (thresh > 1.+PETSC_SMALL && !same_data) { 2258 compute_range = PETSC_TRUE; 2259 } 2260 2261 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2262 if (compute_range) { 2263 2264 /* ask for eigenvalues larger than thresh */ 2265 #if defined(PETSC_USE_COMPLEX) 2266 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2267 #else 2268 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2269 #endif 2270 } else if (!same_data) { 2271 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 2272 B_IL = 1; 2273 #if defined(PETSC_USE_COMPLEX) 2274 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2275 #else 2276 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2277 #endif 2278 } else { /* same_data is true, so get the adaptive function requested by the user */ 2279 PetscInt k; 2280 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 2281 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 2282 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 2283 nmin = nmax; 2284 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 2285 for (k=0;k<nmax;k++) { 2286 eigs[k] = 1./PETSC_SMALL; 2287 eigv[k*(subset_size+1)] = 1.0; 2288 } 2289 } 2290 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2291 if (B_ierr) { 2292 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2293 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2294 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2295 } 2296 2297 if (B_neigs > nmax) { 2298 if (pcbddc->dbg_flag) { 2299 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 2300 } 2301 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 2302 B_neigs = nmax; 2303 } 2304 2305 nmin_s = PetscMin(nmin,B_N); 2306 if (B_neigs < nmin_s) { 2307 PetscBLASInt B_neigs2; 2308 2309 if (pcbddc->use_deluxe_scaling) { 2310 B_IL = B_N - nmin_s + 1; 2311 B_IU = B_N - B_neigs; 2312 } else { 2313 B_IL = B_neigs + 1; 2314 B_IU = nmin_s; 2315 } 2316 if (pcbddc->dbg_flag) { 2317 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 2318 } 2319 if (sub_schurs->is_hermitian) { 2320 PetscInt j,k; 2321 for (j=0;j<subset_size;j++) { 2322 for (k=j;k<subset_size;k++) { 2323 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 2324 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 2325 } 2326 } 2327 } else { 2328 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2329 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2330 } 2331 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2332 #if defined(PETSC_USE_COMPLEX) 2333 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 2334 #else 2335 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 2336 #endif 2337 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2338 B_neigs += B_neigs2; 2339 } 2340 if (B_ierr) { 2341 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 2342 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 2343 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 2344 } 2345 if (pcbddc->dbg_flag) { 2346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 2347 for (j=0;j<B_neigs;j++) { 2348 if (eigs[j] == 0.0) { 2349 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 2350 } else { 2351 if (pcbddc->use_deluxe_scaling) { 2352 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 2353 } else { 2354 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 2355 } 2356 } 2357 } 2358 } 2359 } else { 2360 /* TODO */ 2361 } 2362 } 2363 /* change the basis back to the original one */ 2364 if (sub_schurs->change) { 2365 Mat change,phi,phit; 2366 2367 if (pcbddc->dbg_flag > 1) { 2368 PetscInt ii; 2369 for (ii=0;ii<B_neigs;ii++) { 2370 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2371 for (j=0;j<B_N;j++) { 2372 #if defined(PETSC_USE_COMPLEX) 2373 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 2374 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 2375 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2376 #else 2377 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 2378 #endif 2379 } 2380 } 2381 } 2382 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 2383 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 2384 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 2385 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 2386 ierr = MatDestroy(&phit);CHKERRQ(ierr); 2387 ierr = MatDestroy(&phi);CHKERRQ(ierr); 2388 } 2389 maxneigs = PetscMax(B_neigs,maxneigs); 2390 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 2391 if (B_neigs) { 2392 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 2393 2394 if (pcbddc->dbg_flag > 1) { 2395 PetscInt ii; 2396 for (ii=0;ii<B_neigs;ii++) { 2397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 2398 for (j=0;j<B_N;j++) { 2399 #if defined(PETSC_USE_COMPLEX) 2400 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2401 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 2402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 2403 #else 2404 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 2405 #endif 2406 } 2407 } 2408 } 2409 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 2410 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 2411 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 2412 cum++; 2413 } 2414 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 2415 /* shift for next computation */ 2416 cumarray += subset_size*subset_size; 2417 } 2418 if (pcbddc->dbg_flag) { 2419 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2420 } 2421 2422 if (mss) { 2423 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 2424 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 2425 /* destroy matrices (junk) */ 2426 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 2427 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 2428 } 2429 if (allocated_S_St) { 2430 ierr = PetscFree2(S,St);CHKERRQ(ierr); 2431 } 2432 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 2433 #if defined(PETSC_USE_COMPLEX) 2434 ierr = PetscFree(rwork);CHKERRQ(ierr); 2435 #endif 2436 if (pcbddc->dbg_flag) { 2437 PetscInt maxneigs_r; 2438 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2439 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 2440 } 2441 PetscFunctionReturn(0); 2442 } 2443 2444 #undef __FUNCT__ 2445 #define __FUNCT__ "PCBDDCSetUpSolvers" 2446 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 2447 { 2448 PetscScalar *coarse_submat_vals; 2449 PetscErrorCode ierr; 2450 2451 PetscFunctionBegin; 2452 /* Setup local scatters R_to_B and (optionally) R_to_D */ 2453 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 2454 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 2455 2456 /* Setup local neumann solver ksp_R */ 2457 /* PCBDDCSetUpLocalScatters should be called first! */ 2458 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 2459 2460 /* 2461 Setup local correction and local part of coarse basis. 2462 Gives back the dense local part of the coarse matrix in column major ordering 2463 */ 2464 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 2465 2466 /* Compute total number of coarse nodes and setup coarse solver */ 2467 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 2468 2469 /* free */ 2470 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 2471 PetscFunctionReturn(0); 2472 } 2473 2474 #undef __FUNCT__ 2475 #define __FUNCT__ "PCBDDCResetCustomization" 2476 PetscErrorCode PCBDDCResetCustomization(PC pc) 2477 { 2478 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2479 PetscErrorCode ierr; 2480 2481 PetscFunctionBegin; 2482 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 2483 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 2484 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 2485 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2486 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 2487 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2488 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2489 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2490 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 2491 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 2492 PetscFunctionReturn(0); 2493 } 2494 2495 #undef __FUNCT__ 2496 #define __FUNCT__ "PCBDDCResetTopography" 2497 PetscErrorCode PCBDDCResetTopography(PC pc) 2498 { 2499 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2500 PetscInt i; 2501 PetscErrorCode ierr; 2502 2503 PetscFunctionBegin; 2504 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 2505 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2506 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2507 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 2508 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 2509 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2510 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 2511 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 2512 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2513 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2514 pcbddc->graphanalyzed = PETSC_FALSE; 2515 for (i=0;i<pcbddc->n_local_subs;i++) { 2516 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 2517 } 2518 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 2519 if (pcbddc->sub_schurs) { 2520 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 2521 } 2522 PetscFunctionReturn(0); 2523 } 2524 2525 #undef __FUNCT__ 2526 #define __FUNCT__ "PCBDDCResetSolvers" 2527 PetscErrorCode PCBDDCResetSolvers(PC pc) 2528 { 2529 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2530 PetscErrorCode ierr; 2531 2532 PetscFunctionBegin; 2533 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 2534 if (pcbddc->coarse_phi_B) { 2535 PetscScalar *array; 2536 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 2537 ierr = PetscFree(array);CHKERRQ(ierr); 2538 } 2539 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2540 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2541 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2542 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2543 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2544 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2545 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2546 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2547 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2548 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2549 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2550 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2551 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2552 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 2553 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 2554 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 2555 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 2556 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2557 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2558 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2559 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 2560 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 2561 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2562 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 2563 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 2564 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2565 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2566 if (pcbddc->benign_zerodiag_subs) { 2567 PetscInt i; 2568 for (i=0;i<pcbddc->benign_n;i++) { 2569 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2570 } 2571 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2572 } 2573 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2574 PetscFunctionReturn(0); 2575 } 2576 2577 #undef __FUNCT__ 2578 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 2579 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 2580 { 2581 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2582 PC_IS *pcis = (PC_IS*)pc->data; 2583 VecType impVecType; 2584 PetscInt n_constraints,n_R,old_size; 2585 PetscErrorCode ierr; 2586 2587 PetscFunctionBegin; 2588 if (!pcbddc->ConstraintMatrix) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 2589 /* get sizes */ 2590 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 2591 n_R = pcis->n - pcbddc->n_vertices; 2592 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 2593 /* local work vectors (try to avoid unneeded work)*/ 2594 /* R nodes */ 2595 old_size = -1; 2596 if (pcbddc->vec1_R) { 2597 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 2598 } 2599 if (n_R != old_size) { 2600 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 2601 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 2602 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 2603 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 2604 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 2605 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 2606 } 2607 /* local primal dofs */ 2608 old_size = -1; 2609 if (pcbddc->vec1_P) { 2610 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 2611 } 2612 if (pcbddc->local_primal_size != old_size) { 2613 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 2614 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 2615 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 2616 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 2617 } 2618 /* local explicit constraints */ 2619 old_size = -1; 2620 if (pcbddc->vec1_C) { 2621 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 2622 } 2623 if (n_constraints && n_constraints != old_size) { 2624 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 2625 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 2626 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 2627 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 2628 } 2629 PetscFunctionReturn(0); 2630 } 2631 2632 #undef __FUNCT__ 2633 #define __FUNCT__ "PCBDDCSetUpCorrection" 2634 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 2635 { 2636 PetscErrorCode ierr; 2637 /* pointers to pcis and pcbddc */ 2638 PC_IS* pcis = (PC_IS*)pc->data; 2639 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2640 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2641 /* submatrices of local problem */ 2642 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 2643 /* submatrices of local coarse problem */ 2644 Mat S_VV,S_CV,S_VC,S_CC; 2645 /* working matrices */ 2646 Mat C_CR; 2647 /* additional working stuff */ 2648 PC pc_R; 2649 Mat F; 2650 Vec dummy_vec; 2651 PetscBool isLU,isCHOL,isILU,need_benign_correction; 2652 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 2653 PetscScalar *work; 2654 PetscInt *idx_V_B; 2655 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 2656 PetscInt i,n_R,n_D,n_B; 2657 2658 /* some shortcuts to scalars */ 2659 PetscScalar one=1.0,m_one=-1.0; 2660 2661 PetscFunctionBegin; 2662 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 2663 2664 /* Set Non-overlapping dimensions */ 2665 n_vertices = pcbddc->n_vertices; 2666 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 2667 n_B = pcis->n_B; 2668 n_D = pcis->n - n_B; 2669 n_R = pcis->n - n_vertices; 2670 2671 /* vertices in boundary numbering */ 2672 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 2673 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 2674 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 2675 2676 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 2677 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 2678 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 2679 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 2680 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 2681 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 2682 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 2683 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 2684 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 2685 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 2686 2687 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 2688 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 2689 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 2690 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 2691 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 2692 lda_rhs = n_R; 2693 need_benign_correction = PETSC_FALSE; 2694 if (isLU || isILU || isCHOL) { 2695 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 2696 } else if (sub_schurs && sub_schurs->reuse_solver) { 2697 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2698 MatFactorType type; 2699 2700 F = reuse_solver->F; 2701 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 2702 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 2703 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 2704 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 2705 } else { 2706 F = NULL; 2707 } 2708 2709 /* allocate workspace */ 2710 n = 0; 2711 if (n_constraints) { 2712 n += lda_rhs*n_constraints; 2713 } 2714 if (n_vertices) { 2715 n = PetscMax(2*lda_rhs*n_vertices,n); 2716 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 2717 } 2718 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 2719 2720 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 2721 dummy_vec = NULL; 2722 if (need_benign_correction && lda_rhs != n_R && F) { 2723 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 2724 } 2725 2726 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 2727 if (n_constraints) { 2728 Mat M1,M2,M3,C_B; 2729 IS is_aux; 2730 PetscScalar *array,*array2; 2731 2732 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 2733 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 2734 2735 /* Extract constraints on R nodes: C_{CR} */ 2736 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 2737 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 2738 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 2739 2740 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 2741 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 2742 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2743 for (i=0;i<n_constraints;i++) { 2744 const PetscScalar *row_cmat_values; 2745 const PetscInt *row_cmat_indices; 2746 PetscInt size_of_constraint,j; 2747 2748 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2749 for (j=0;j<size_of_constraint;j++) { 2750 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 2751 } 2752 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 2753 } 2754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 2755 if (F) { 2756 Mat B; 2757 2758 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 2759 if (need_benign_correction) { 2760 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2761 2762 /* rhs is already zero on interior dofs, no need to change the rhs */ 2763 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 2764 } 2765 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 2766 if (need_benign_correction) { 2767 PetscScalar *marr; 2768 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2769 2770 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2771 if (lda_rhs != n_R) { 2772 for (i=0;i<n_constraints;i++) { 2773 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 2774 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2775 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 2776 } 2777 } else { 2778 for (i=0;i<n_constraints;i++) { 2779 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 2780 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 2781 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2782 } 2783 } 2784 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2785 } 2786 ierr = MatDestroy(&B);CHKERRQ(ierr); 2787 } else { 2788 PetscScalar *marr; 2789 2790 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2791 for (i=0;i<n_constraints;i++) { 2792 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 2793 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 2794 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2795 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2796 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 2797 } 2798 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 2799 } 2800 if (!pcbddc->switch_static) { 2801 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2802 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2803 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2804 for (i=0;i<n_constraints;i++) { 2805 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 2806 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 2807 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2808 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2809 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 2810 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 2811 } 2812 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 2813 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 2814 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2815 } else { 2816 if (lda_rhs != n_R) { 2817 IS dummy; 2818 2819 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 2820 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 2821 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 2822 } else { 2823 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 2824 pcbddc->local_auxmat2 = local_auxmat2_R; 2825 } 2826 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 2827 } 2828 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2829 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 2830 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 2831 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 2832 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 2833 if (isCHOL) { 2834 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 2835 } else { 2836 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 2837 } 2838 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 2839 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 2840 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 2841 ierr = MatDestroy(&M2);CHKERRQ(ierr); 2842 ierr = MatDestroy(&M3);CHKERRQ(ierr); 2843 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 2844 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 2845 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 2846 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 2847 ierr = MatDestroy(&M1);CHKERRQ(ierr); 2848 } 2849 2850 /* Get submatrices from subdomain matrix */ 2851 if (n_vertices) { 2852 IS is_aux; 2853 2854 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 2855 IS tis; 2856 2857 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 2858 ierr = ISSort(tis);CHKERRQ(ierr); 2859 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 2860 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2861 } else { 2862 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 2863 } 2864 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 2865 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 2866 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 2867 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 2868 } 2869 2870 /* Matrix of coarse basis functions (local) */ 2871 if (pcbddc->coarse_phi_B) { 2872 PetscInt on_B,on_primal,on_D=n_D; 2873 if (pcbddc->coarse_phi_D) { 2874 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 2875 } 2876 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 2877 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 2878 PetscScalar *marray; 2879 2880 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 2881 ierr = PetscFree(marray);CHKERRQ(ierr); 2882 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2883 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2884 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2885 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2886 } 2887 } 2888 2889 if (!pcbddc->coarse_phi_B) { 2890 PetscScalar *marray; 2891 2892 n = n_B*pcbddc->local_primal_size; 2893 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2894 n += n_D*pcbddc->local_primal_size; 2895 } 2896 if (!pcbddc->symmetric_primal) { 2897 n *= 2; 2898 } 2899 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 2900 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 2901 n = n_B*pcbddc->local_primal_size; 2902 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2903 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 2904 n += n_D*pcbddc->local_primal_size; 2905 } 2906 if (!pcbddc->symmetric_primal) { 2907 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 2908 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2909 n = n_B*pcbddc->local_primal_size; 2910 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 2911 } 2912 } else { 2913 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 2914 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 2915 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2916 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 2917 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 2918 } 2919 } 2920 } 2921 2922 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 2923 p0_lidx_I = NULL; 2924 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 2925 const PetscInt *idxs; 2926 2927 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2928 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 2929 for (i=0;i<pcbddc->benign_n;i++) { 2930 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 2931 } 2932 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 2933 } 2934 2935 /* vertices */ 2936 if (n_vertices) { 2937 2938 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 2939 2940 if (n_R) { 2941 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 2942 PetscBLASInt B_N,B_one = 1; 2943 PetscScalar *x,*y; 2944 PetscBool isseqaij; 2945 2946 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 2947 if (need_benign_correction) { 2948 ISLocalToGlobalMapping RtoN; 2949 IS is_p0; 2950 PetscInt *idxs_p0,n; 2951 2952 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 2953 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 2954 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 2955 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 2956 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 2957 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 2958 ierr = MatGetSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 2959 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2960 } 2961 2962 if (lda_rhs == n_R) { 2963 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2964 } else { 2965 PetscScalar *av,*array; 2966 const PetscInt *xadj,*adjncy; 2967 PetscInt n; 2968 PetscBool flg_row; 2969 2970 array = work+lda_rhs*n_vertices; 2971 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 2972 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 2973 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2974 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 2975 for (i=0;i<n;i++) { 2976 PetscInt j; 2977 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 2978 } 2979 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2980 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 2981 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 2982 } 2983 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 2984 if (need_benign_correction) { 2985 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 2986 PetscScalar *marr; 2987 2988 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 2989 /* need \Phi^T A_RV = (I+L)A_RV, L given by 2990 2991 | 0 0 0 | (V) 2992 L = | 0 0 -1 | (P-p0) 2993 | 0 0 -1 | (p0) 2994 2995 */ 2996 for (i=0;i<reuse_solver->benign_n;i++) { 2997 const PetscScalar *vals; 2998 const PetscInt *idxs,*idxs_zero; 2999 PetscInt n,j,nz; 3000 3001 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3002 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3003 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3004 for (j=0;j<n;j++) { 3005 PetscScalar val = vals[j]; 3006 PetscInt k,col = idxs[j]; 3007 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3008 } 3009 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3010 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3011 } 3012 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3013 } 3014 if (F) { 3015 /* need to correct the rhs */ 3016 if (need_benign_correction) { 3017 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3018 PetscScalar *marr; 3019 3020 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3021 if (lda_rhs != n_R) { 3022 for (i=0;i<n_vertices;i++) { 3023 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3024 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3025 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3026 } 3027 } else { 3028 for (i=0;i<n_vertices;i++) { 3029 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3030 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3031 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3032 } 3033 } 3034 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3035 } 3036 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3037 /* need to correct the solution */ 3038 if (need_benign_correction) { 3039 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3040 PetscScalar *marr; 3041 3042 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3043 if (lda_rhs != n_R) { 3044 for (i=0;i<n_vertices;i++) { 3045 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3046 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3047 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3048 } 3049 } else { 3050 for (i=0;i<n_vertices;i++) { 3051 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3052 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3053 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3054 } 3055 } 3056 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3057 } 3058 } else { 3059 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3060 for (i=0;i<n_vertices;i++) { 3061 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3062 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3063 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3064 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3065 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3066 } 3067 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3068 } 3069 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3070 /* S_VV and S_CV */ 3071 if (n_constraints) { 3072 Mat B; 3073 3074 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3075 for (i=0;i<n_vertices;i++) { 3076 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3077 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3078 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3079 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3080 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3081 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3082 } 3083 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3084 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3085 ierr = MatDestroy(&B);CHKERRQ(ierr); 3086 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3087 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3088 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3089 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3090 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3091 ierr = MatDestroy(&B);CHKERRQ(ierr); 3092 } 3093 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3094 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3095 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3096 } 3097 if (lda_rhs != n_R) { 3098 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3099 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3100 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3101 } 3102 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3103 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3104 if (need_benign_correction) { 3105 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3106 PetscScalar *marr,*sums; 3107 3108 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3109 ierr = MatDenseGetArray(S_VVt,&marr); 3110 for (i=0;i<reuse_solver->benign_n;i++) { 3111 const PetscScalar *vals; 3112 const PetscInt *idxs,*idxs_zero; 3113 PetscInt n,j,nz; 3114 3115 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3116 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3117 for (j=0;j<n_vertices;j++) { 3118 PetscInt k; 3119 sums[j] = 0.; 3120 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3121 } 3122 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3123 for (j=0;j<n;j++) { 3124 PetscScalar val = vals[j]; 3125 PetscInt k; 3126 for (k=0;k<n_vertices;k++) { 3127 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3128 } 3129 } 3130 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3131 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3132 } 3133 ierr = PetscFree(sums);CHKERRQ(ierr); 3134 ierr = MatDenseRestoreArray(S_VVt,&marr); 3135 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3136 } 3137 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3138 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3139 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3140 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3141 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3142 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3143 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3144 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3145 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 3146 } else { 3147 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3148 } 3149 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 3150 3151 /* coarse basis functions */ 3152 for (i=0;i<n_vertices;i++) { 3153 PetscScalar *y; 3154 3155 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3156 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3157 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3158 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3159 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3160 y[n_B*i+idx_V_B[i]] = 1.0; 3161 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3162 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3163 3164 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3165 PetscInt j; 3166 3167 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3168 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3169 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3170 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3171 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3172 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3173 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3174 } 3175 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3176 } 3177 /* if n_R == 0 the object is not destroyed */ 3178 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3179 } 3180 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 3181 3182 if (n_constraints) { 3183 Mat B; 3184 3185 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3186 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3187 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3188 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 3189 if (n_vertices) { 3190 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 3191 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 3192 } else { 3193 Mat S_VCt; 3194 3195 if (lda_rhs != n_R) { 3196 ierr = MatDestroy(&B);CHKERRQ(ierr); 3197 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 3198 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 3199 } 3200 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 3201 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3202 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 3203 } 3204 } 3205 ierr = MatDestroy(&B);CHKERRQ(ierr); 3206 /* coarse basis functions */ 3207 for (i=0;i<n_constraints;i++) { 3208 PetscScalar *y; 3209 3210 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 3211 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3212 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 3213 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3214 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3215 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 3216 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3217 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3218 PetscInt j; 3219 3220 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3221 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 3222 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3223 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3224 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3225 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 3226 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 3227 } 3228 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3229 } 3230 } 3231 if (n_constraints) { 3232 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 3233 } 3234 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 3235 3236 /* coarse matrix entries relative to B_0 */ 3237 if (pcbddc->benign_n) { 3238 Mat B0_B,B0_BPHI; 3239 IS is_dummy; 3240 PetscScalar *data; 3241 PetscInt j; 3242 3243 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3244 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3245 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3246 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3247 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3248 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 3249 for (j=0;j<pcbddc->benign_n;j++) { 3250 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3251 for (i=0;i<pcbddc->local_primal_size;i++) { 3252 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 3253 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 3254 } 3255 } 3256 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 3257 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3258 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3259 } 3260 3261 /* compute other basis functions for non-symmetric problems */ 3262 if (!pcbddc->symmetric_primal) { 3263 Mat B_V=NULL,B_C=NULL; 3264 PetscScalar *marray; 3265 3266 if (n_constraints) { 3267 Mat S_CCT,C_CRT; 3268 3269 ierr = MatTranspose(C_CR,MAT_INPLACE_MATRIX,&C_CRT);CHKERRQ(ierr); 3270 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 3271 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 3272 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 3273 if (n_vertices) { 3274 Mat S_VCT; 3275 3276 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 3277 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 3278 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 3279 } 3280 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 3281 } else { 3282 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 3283 } 3284 if (n_vertices && n_R) { 3285 PetscScalar *av,*marray; 3286 const PetscInt *xadj,*adjncy; 3287 PetscInt n; 3288 PetscBool flg_row; 3289 3290 /* B_V = B_V - A_VR^T */ 3291 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3292 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3293 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 3294 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3295 for (i=0;i<n;i++) { 3296 PetscInt j; 3297 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 3298 } 3299 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3300 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3301 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3302 } 3303 3304 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 3305 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 3306 for (i=0;i<n_vertices;i++) { 3307 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 3308 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3309 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3310 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3311 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3312 } 3313 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 3314 if (B_C) { 3315 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 3316 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 3317 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 3318 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 3319 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3320 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3321 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3322 } 3323 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 3324 } 3325 /* coarse basis functions */ 3326 for (i=0;i<pcbddc->local_primal_size;i++) { 3327 PetscScalar *y; 3328 3329 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 3330 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3331 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 3332 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3333 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3334 if (i<n_vertices) { 3335 y[n_B*i+idx_V_B[i]] = 1.0; 3336 } 3337 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 3338 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3339 3340 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3341 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3342 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 3343 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3344 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3345 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 3346 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 3347 } 3348 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3349 } 3350 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 3351 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 3352 } 3353 /* free memory */ 3354 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 3355 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 3356 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 3357 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 3358 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 3359 ierr = PetscFree(work);CHKERRQ(ierr); 3360 if (n_vertices) { 3361 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 3362 } 3363 if (n_constraints) { 3364 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 3365 } 3366 /* Checking coarse_sub_mat and coarse basis functios */ 3367 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3368 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 3369 if (pcbddc->dbg_flag) { 3370 Mat coarse_sub_mat; 3371 Mat AUXMAT,TM1,TM2,TM3,TM4; 3372 Mat coarse_phi_D,coarse_phi_B; 3373 Mat coarse_psi_D,coarse_psi_B; 3374 Mat A_II,A_BB,A_IB,A_BI; 3375 Mat C_B,CPHI; 3376 IS is_dummy; 3377 Vec mones; 3378 MatType checkmattype=MATSEQAIJ; 3379 PetscReal real_value; 3380 3381 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3382 Mat A; 3383 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 3384 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3385 ierr = MatGetSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3386 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3387 ierr = MatGetSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3388 ierr = MatDestroy(&A);CHKERRQ(ierr); 3389 } else { 3390 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 3391 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 3392 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 3393 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 3394 } 3395 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 3396 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 3397 if (!pcbddc->symmetric_primal) { 3398 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 3399 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 3400 } 3401 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 3402 3403 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3404 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 3405 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3406 if (!pcbddc->symmetric_primal) { 3407 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3408 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3409 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3410 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3411 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3412 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3413 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3414 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3415 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3416 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3417 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3418 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3419 } else { 3420 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 3421 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 3422 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3423 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 3424 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3425 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 3426 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 3427 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 3428 } 3429 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3430 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3431 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3432 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 3433 if (pcbddc->benign_n) { 3434 Mat B0_B,B0_BPHI; 3435 PetscScalar *data,*data2; 3436 PetscInt j; 3437 3438 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3439 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 3440 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 3441 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 3442 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 3443 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 3444 for (j=0;j<pcbddc->benign_n;j++) { 3445 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 3446 for (i=0;i<pcbddc->local_primal_size;i++) { 3447 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 3448 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 3449 } 3450 } 3451 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 3452 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 3453 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 3454 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3455 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 3456 } 3457 #if 0 3458 { 3459 PetscViewer viewer; 3460 char filename[256]; 3461 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 3462 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3463 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3464 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 3465 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 3466 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 3467 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 3468 if (save_change) { 3469 Mat phi_B; 3470 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 3471 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 3472 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 3473 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 3474 } else { 3475 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 3476 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 3477 } 3478 if (pcbddc->coarse_phi_D) { 3479 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 3480 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 3481 } 3482 if (pcbddc->coarse_psi_B) { 3483 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 3484 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 3485 } 3486 if (pcbddc->coarse_psi_D) { 3487 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 3488 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 3489 } 3490 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3491 } 3492 #endif 3493 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 3494 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3495 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3496 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3497 3498 /* check constraints */ 3499 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 3500 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3501 if (!pcbddc->benign_n) { /* TODO: add benign case */ 3502 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3503 } else { 3504 PetscScalar *data; 3505 Mat tmat; 3506 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3507 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 3508 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 3509 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3510 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3511 } 3512 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 3513 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3514 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3515 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3516 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3517 if (!pcbddc->symmetric_primal) { 3518 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 3519 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 3520 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 3521 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 3522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 3523 } 3524 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3525 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 3526 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3527 ierr = VecDestroy(&mones);CHKERRQ(ierr); 3528 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3529 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 3530 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 3531 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 3532 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 3533 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 3534 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 3535 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 3536 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 3537 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 3538 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 3539 if (!pcbddc->symmetric_primal) { 3540 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 3541 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 3542 } 3543 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 3544 } 3545 /* get back data */ 3546 *coarse_submat_vals_n = coarse_submat_vals; 3547 PetscFunctionReturn(0); 3548 } 3549 3550 #undef __FUNCT__ 3551 #define __FUNCT__ "MatGetSubMatrixUnsorted" 3552 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 3553 { 3554 Mat *work_mat; 3555 IS isrow_s,iscol_s; 3556 PetscBool rsorted,csorted; 3557 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 3558 PetscErrorCode ierr; 3559 3560 PetscFunctionBegin; 3561 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 3562 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 3563 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 3564 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3565 3566 if (!rsorted) { 3567 const PetscInt *idxs; 3568 PetscInt *idxs_sorted,i; 3569 3570 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 3571 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 3572 for (i=0;i<rsize;i++) { 3573 idxs_perm_r[i] = i; 3574 } 3575 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 3576 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 3577 for (i=0;i<rsize;i++) { 3578 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 3579 } 3580 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 3581 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 3582 } else { 3583 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3584 isrow_s = isrow; 3585 } 3586 3587 if (!csorted) { 3588 if (isrow == iscol) { 3589 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 3590 iscol_s = isrow_s; 3591 } else { 3592 const PetscInt *idxs; 3593 PetscInt *idxs_sorted,i; 3594 3595 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 3596 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 3597 for (i=0;i<csize;i++) { 3598 idxs_perm_c[i] = i; 3599 } 3600 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 3601 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 3602 for (i=0;i<csize;i++) { 3603 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 3604 } 3605 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 3606 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 3607 } 3608 } else { 3609 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3610 iscol_s = iscol; 3611 } 3612 3613 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3614 3615 if (!rsorted || !csorted) { 3616 Mat new_mat; 3617 IS is_perm_r,is_perm_c; 3618 3619 if (!rsorted) { 3620 PetscInt *idxs_r,i; 3621 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 3622 for (i=0;i<rsize;i++) { 3623 idxs_r[idxs_perm_r[i]] = i; 3624 } 3625 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 3626 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 3627 } else { 3628 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 3629 } 3630 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 3631 3632 if (!csorted) { 3633 if (isrow_s == iscol_s) { 3634 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 3635 is_perm_c = is_perm_r; 3636 } else { 3637 PetscInt *idxs_c,i; 3638 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 3639 for (i=0;i<csize;i++) { 3640 idxs_c[idxs_perm_c[i]] = i; 3641 } 3642 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 3643 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 3644 } 3645 } else { 3646 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 3647 } 3648 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 3649 3650 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 3651 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 3652 work_mat[0] = new_mat; 3653 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 3654 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 3655 } 3656 3657 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 3658 *B = work_mat[0]; 3659 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 3660 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 3661 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 3662 PetscFunctionReturn(0); 3663 } 3664 3665 #undef __FUNCT__ 3666 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 3667 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 3668 { 3669 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3670 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3671 Mat new_mat; 3672 IS is_local,is_global; 3673 PetscInt local_size; 3674 PetscBool isseqaij; 3675 PetscErrorCode ierr; 3676 3677 PetscFunctionBegin; 3678 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3679 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 3680 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 3681 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 3682 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 3683 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 3684 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 3685 3686 /* check */ 3687 if (pcbddc->dbg_flag) { 3688 Vec x,x_change; 3689 PetscReal error; 3690 3691 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 3692 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3693 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 3694 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3695 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3696 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 3697 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3698 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3699 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3700 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3701 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3702 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 3703 ierr = VecDestroy(&x);CHKERRQ(ierr); 3704 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3705 } 3706 3707 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 3708 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3709 if (isseqaij) { 3710 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3711 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3712 } else { 3713 Mat work_mat; 3714 3715 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3716 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 3717 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 3718 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 3719 } 3720 if (matis->A->symmetric_set) { 3721 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 3722 #if !defined(PETSC_USE_COMPLEX) 3723 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 3724 #endif 3725 } 3726 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 3727 PetscFunctionReturn(0); 3728 } 3729 3730 #undef __FUNCT__ 3731 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 3732 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 3733 { 3734 PC_IS* pcis = (PC_IS*)(pc->data); 3735 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3736 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3737 PetscInt *idx_R_local=NULL; 3738 PetscInt n_vertices,i,j,n_R,n_D,n_B; 3739 PetscInt vbs,bs; 3740 PetscBT bitmask=NULL; 3741 PetscErrorCode ierr; 3742 3743 PetscFunctionBegin; 3744 /* 3745 No need to setup local scatters if 3746 - primal space is unchanged 3747 AND 3748 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 3749 AND 3750 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 3751 */ 3752 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 3753 PetscFunctionReturn(0); 3754 } 3755 /* destroy old objects */ 3756 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3757 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3758 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3759 /* Set Non-overlapping dimensions */ 3760 n_B = pcis->n_B; 3761 n_D = pcis->n - n_B; 3762 n_vertices = pcbddc->n_vertices; 3763 3764 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 3765 3766 /* create auxiliary bitmask and allocate workspace */ 3767 if (!sub_schurs || !sub_schurs->reuse_solver) { 3768 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 3769 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 3770 for (i=0;i<n_vertices;i++) { 3771 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 3772 } 3773 3774 for (i=0, n_R=0; i<pcis->n; i++) { 3775 if (!PetscBTLookup(bitmask,i)) { 3776 idx_R_local[n_R++] = i; 3777 } 3778 } 3779 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 3780 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3781 3782 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3783 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 3784 } 3785 3786 /* Block code */ 3787 vbs = 1; 3788 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 3789 if (bs>1 && !(n_vertices%bs)) { 3790 PetscBool is_blocked = PETSC_TRUE; 3791 PetscInt *vary; 3792 if (!sub_schurs || !sub_schurs->reuse_solver) { 3793 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 3794 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 3795 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 3796 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 3797 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 3798 for (i=0; i<pcis->n/bs; i++) { 3799 if (vary[i]!=0 && vary[i]!=bs) { 3800 is_blocked = PETSC_FALSE; 3801 break; 3802 } 3803 } 3804 ierr = PetscFree(vary);CHKERRQ(ierr); 3805 } else { 3806 /* Verify directly the R set */ 3807 for (i=0; i<n_R/bs; i++) { 3808 PetscInt j,node=idx_R_local[bs*i]; 3809 for (j=1; j<bs; j++) { 3810 if (node != idx_R_local[bs*i+j]-j) { 3811 is_blocked = PETSC_FALSE; 3812 break; 3813 } 3814 } 3815 } 3816 } 3817 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 3818 vbs = bs; 3819 for (i=0;i<n_R/vbs;i++) { 3820 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 3821 } 3822 } 3823 } 3824 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 3825 if (sub_schurs && sub_schurs->reuse_solver) { 3826 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3827 3828 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3829 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 3830 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 3831 reuse_solver->is_R = pcbddc->is_R_local; 3832 } else { 3833 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 3834 } 3835 3836 /* print some info if requested */ 3837 if (pcbddc->dbg_flag) { 3838 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3839 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3840 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3841 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 3842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 3843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 3844 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3845 } 3846 3847 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 3848 if (!sub_schurs || !sub_schurs->reuse_solver) { 3849 IS is_aux1,is_aux2; 3850 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 3851 3852 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3853 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 3854 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 3855 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3856 for (i=0; i<n_D; i++) { 3857 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 3858 } 3859 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3860 for (i=0, j=0; i<n_R; i++) { 3861 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 3862 aux_array1[j++] = i; 3863 } 3864 } 3865 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3866 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3867 for (i=0, j=0; i<n_B; i++) { 3868 if (!PetscBTLookup(bitmask,is_indices[i])) { 3869 aux_array2[j++] = i; 3870 } 3871 } 3872 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3873 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 3874 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 3875 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3876 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 3877 3878 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3879 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 3880 for (i=0, j=0; i<n_R; i++) { 3881 if (PetscBTLookup(bitmask,idx_R_local[i])) { 3882 aux_array1[j++] = i; 3883 } 3884 } 3885 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 3886 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3887 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 3888 } 3889 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 3890 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 3891 } else { 3892 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3893 IS tis; 3894 PetscInt schur_size; 3895 3896 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 3897 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 3898 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 3899 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3900 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3901 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 3902 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 3903 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3904 } 3905 } 3906 PetscFunctionReturn(0); 3907 } 3908 3909 3910 #undef __FUNCT__ 3911 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 3912 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 3913 { 3914 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3915 PC_IS *pcis = (PC_IS*)pc->data; 3916 PC pc_temp; 3917 Mat A_RR; 3918 MatReuse reuse; 3919 PetscScalar m_one = -1.0; 3920 PetscReal value; 3921 PetscInt n_D,n_R; 3922 PetscBool check_corr[2],issbaij; 3923 PetscErrorCode ierr; 3924 /* prefixes stuff */ 3925 char dir_prefix[256],neu_prefix[256],str_level[16]; 3926 size_t len; 3927 3928 PetscFunctionBegin; 3929 3930 /* compute prefixes */ 3931 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 3932 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 3933 if (!pcbddc->current_level) { 3934 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3935 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3936 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3937 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3938 } else { 3939 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3940 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3941 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3942 len -= 15; /* remove "pc_bddc_coarse_" */ 3943 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3944 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3945 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3946 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3947 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 3948 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 3949 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 3950 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 3951 } 3952 3953 /* DIRICHLET PROBLEM */ 3954 if (dirichlet) { 3955 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3956 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 3957 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 3958 if (pcbddc->dbg_flag) { 3959 Mat A_IIn; 3960 3961 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 3962 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 3963 pcis->A_II = A_IIn; 3964 } 3965 } 3966 if (pcbddc->local_mat->symmetric_set) { 3967 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 3968 } 3969 /* Matrix for Dirichlet problem is pcis->A_II */ 3970 n_D = pcis->n - pcis->n_B; 3971 if (!pcbddc->ksp_D) { /* create object if not yet build */ 3972 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 3973 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 3974 /* default */ 3975 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 3976 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 3977 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 3978 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3979 if (issbaij) { 3980 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 3981 } else { 3982 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 3983 } 3984 /* Allow user's customization */ 3985 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 3986 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3987 } 3988 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 3989 if (sub_schurs && sub_schurs->reuse_solver) { 3990 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3991 3992 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 3993 } 3994 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 3995 if (!n_D) { 3996 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 3997 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 3998 } 3999 /* Set Up KSP for Dirichlet problem of BDDC */ 4000 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4001 /* set ksp_D into pcis data */ 4002 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4003 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4004 pcis->ksp_D = pcbddc->ksp_D; 4005 } 4006 4007 /* NEUMANN PROBLEM */ 4008 A_RR = 0; 4009 if (neumann) { 4010 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4011 PetscInt ibs,mbs; 4012 PetscBool issbaij; 4013 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4014 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4015 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4016 if (pcbddc->ksp_R) { /* already created ksp */ 4017 PetscInt nn_R; 4018 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4019 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4020 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4021 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4022 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4023 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4024 reuse = MAT_INITIAL_MATRIX; 4025 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4026 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4027 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4028 reuse = MAT_INITIAL_MATRIX; 4029 } else { /* safe to reuse the matrix */ 4030 reuse = MAT_REUSE_MATRIX; 4031 } 4032 } 4033 /* last check */ 4034 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4035 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4036 reuse = MAT_INITIAL_MATRIX; 4037 } 4038 } else { /* first time, so we need to create the matrix */ 4039 reuse = MAT_INITIAL_MATRIX; 4040 } 4041 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4042 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4043 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4044 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4045 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4046 if (matis->A == pcbddc->local_mat) { 4047 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4048 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4049 } else { 4050 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4051 } 4052 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4053 if (matis->A == pcbddc->local_mat) { 4054 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4055 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4056 } else { 4057 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4058 } 4059 } 4060 /* extract A_RR */ 4061 if (sub_schurs && sub_schurs->reuse_solver) { 4062 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4063 4064 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4065 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4066 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4067 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4068 } else { 4069 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4070 } 4071 } else { 4072 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4073 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4074 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4075 } 4076 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4077 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4078 } 4079 if (pcbddc->local_mat->symmetric_set) { 4080 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4081 } 4082 if (!pcbddc->ksp_R) { /* create object if not present */ 4083 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4084 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4085 /* default */ 4086 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4087 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4088 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4089 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4090 if (issbaij) { 4091 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4092 } else { 4093 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4094 } 4095 /* Allow user's customization */ 4096 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4097 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4098 } 4099 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4100 if (!n_R) { 4101 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4102 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4103 } 4104 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4105 /* Reuse solver if it is present */ 4106 if (sub_schurs && sub_schurs->reuse_solver) { 4107 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4108 4109 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 4110 } 4111 /* Set Up KSP for Neumann problem of BDDC */ 4112 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 4113 } 4114 4115 if (pcbddc->dbg_flag) { 4116 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4117 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4118 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4119 } 4120 4121 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 4122 check_corr[0] = check_corr[1] = PETSC_FALSE; 4123 if (pcbddc->NullSpace_corr[0]) { 4124 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 4125 } 4126 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 4127 check_corr[0] = PETSC_TRUE; 4128 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 4129 } 4130 if (neumann && pcbddc->NullSpace_corr[2]) { 4131 check_corr[1] = PETSC_TRUE; 4132 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 4133 } 4134 4135 /* check Dirichlet and Neumann solvers */ 4136 if (pcbddc->dbg_flag) { 4137 if (dirichlet) { /* Dirichlet */ 4138 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 4139 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 4140 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 4141 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 4142 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 4143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 4144 if (check_corr[0]) { 4145 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 4146 } 4147 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4148 } 4149 if (neumann) { /* Neumann */ 4150 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 4151 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4152 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 4153 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 4154 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 4155 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 4156 if (check_corr[1]) { 4157 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 4158 } 4159 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4160 } 4161 } 4162 /* free Neumann problem's matrix */ 4163 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4164 PetscFunctionReturn(0); 4165 } 4166 4167 #undef __FUNCT__ 4168 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 4169 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 4170 { 4171 PetscErrorCode ierr; 4172 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4173 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4174 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 4175 4176 PetscFunctionBegin; 4177 if (!reuse_solver) { 4178 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 4179 } 4180 if (!pcbddc->switch_static) { 4181 if (applytranspose && pcbddc->local_auxmat1) { 4182 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4183 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4184 } 4185 if (!reuse_solver) { 4186 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4187 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4188 } else { 4189 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4190 4191 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4192 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4193 } 4194 } else { 4195 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4196 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4197 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4198 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4199 if (applytranspose && pcbddc->local_auxmat1) { 4200 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 4201 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4202 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4203 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4204 } 4205 } 4206 if (!reuse_solver || pcbddc->switch_static) { 4207 if (applytranspose) { 4208 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4209 } else { 4210 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4211 } 4212 } else { 4213 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4214 4215 if (applytranspose) { 4216 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4217 } else { 4218 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 4219 } 4220 } 4221 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 4222 if (!pcbddc->switch_static) { 4223 if (!reuse_solver) { 4224 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4225 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4226 } else { 4227 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4228 4229 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4230 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4231 } 4232 if (!applytranspose && pcbddc->local_auxmat1) { 4233 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4234 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 4235 } 4236 } else { 4237 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4238 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4239 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4240 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4241 if (!applytranspose && pcbddc->local_auxmat1) { 4242 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 4243 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 4244 } 4245 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4246 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4247 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4248 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4249 } 4250 PetscFunctionReturn(0); 4251 } 4252 4253 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 4254 #undef __FUNCT__ 4255 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 4256 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 4257 { 4258 PetscErrorCode ierr; 4259 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4260 PC_IS* pcis = (PC_IS*) (pc->data); 4261 const PetscScalar zero = 0.0; 4262 4263 PetscFunctionBegin; 4264 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 4265 if (!pcbddc->benign_apply_coarse_only) { 4266 if (applytranspose) { 4267 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4268 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4269 } else { 4270 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 4271 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 4272 } 4273 } else { 4274 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 4275 } 4276 4277 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 4278 if (pcbddc->benign_n) { 4279 PetscScalar *array; 4280 PetscInt j; 4281 4282 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4283 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 4284 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4285 } 4286 4287 /* start communications from local primal nodes to rhs of coarse solver */ 4288 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 4289 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4290 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4291 4292 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 4293 if (pcbddc->coarse_ksp) { 4294 Mat coarse_mat; 4295 Vec rhs,sol; 4296 MatNullSpace nullsp; 4297 PetscBool isbddc = PETSC_FALSE; 4298 4299 if (pcbddc->benign_have_null) { 4300 PC coarse_pc; 4301 4302 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4303 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4304 /* we need to propagate to coarser levels the need for a possible benign correction */ 4305 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 4306 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4307 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 4308 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 4309 } 4310 } 4311 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 4312 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 4313 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4314 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 4315 if (nullsp) { 4316 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 4317 } 4318 if (applytranspose) { 4319 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 4320 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4321 } else { 4322 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 4323 PC coarse_pc; 4324 4325 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4326 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4327 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 4328 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 4329 } else { 4330 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 4331 } 4332 } 4333 /* we don't need the benign correction at coarser levels anymore */ 4334 if (pcbddc->benign_have_null && isbddc) { 4335 PC coarse_pc; 4336 PC_BDDC* coarsepcbddc; 4337 4338 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 4339 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 4340 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 4341 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 4342 } 4343 if (nullsp) { 4344 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 4345 } 4346 } 4347 4348 /* Local solution on R nodes */ 4349 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 4350 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 4351 } 4352 /* communications from coarse sol to local primal nodes */ 4353 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4354 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4355 4356 /* Sum contributions from the two levels */ 4357 if (!pcbddc->benign_apply_coarse_only) { 4358 if (applytranspose) { 4359 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4360 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4361 } else { 4362 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 4363 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 4364 } 4365 /* store p0 */ 4366 if (pcbddc->benign_n) { 4367 PetscScalar *array; 4368 PetscInt j; 4369 4370 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4371 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 4372 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 4373 } 4374 } else { /* expand the coarse solution */ 4375 if (applytranspose) { 4376 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4377 } else { 4378 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 4379 } 4380 } 4381 PetscFunctionReturn(0); 4382 } 4383 4384 #undef __FUNCT__ 4385 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 4386 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 4387 { 4388 PetscErrorCode ierr; 4389 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4390 PetscScalar *array; 4391 Vec from,to; 4392 4393 PetscFunctionBegin; 4394 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4395 from = pcbddc->coarse_vec; 4396 to = pcbddc->vec1_P; 4397 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4398 Vec tvec; 4399 4400 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4401 ierr = VecResetArray(tvec);CHKERRQ(ierr); 4402 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4403 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 4404 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 4405 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 4406 } 4407 } else { /* from local to global -> put data in coarse right hand side */ 4408 from = pcbddc->vec1_P; 4409 to = pcbddc->coarse_vec; 4410 } 4411 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4412 PetscFunctionReturn(0); 4413 } 4414 4415 #undef __FUNCT__ 4416 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 4417 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 4418 { 4419 PetscErrorCode ierr; 4420 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 4421 PetscScalar *array; 4422 Vec from,to; 4423 4424 PetscFunctionBegin; 4425 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 4426 from = pcbddc->coarse_vec; 4427 to = pcbddc->vec1_P; 4428 } else { /* from local to global -> put data in coarse right hand side */ 4429 from = pcbddc->vec1_P; 4430 to = pcbddc->coarse_vec; 4431 } 4432 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 4433 if (smode == SCATTER_FORWARD) { 4434 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 4435 Vec tvec; 4436 4437 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 4438 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 4439 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 4440 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 4441 } 4442 } else { 4443 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 4444 ierr = VecResetArray(from);CHKERRQ(ierr); 4445 } 4446 } 4447 PetscFunctionReturn(0); 4448 } 4449 4450 /* uncomment for testing purposes */ 4451 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 4452 #undef __FUNCT__ 4453 #define __FUNCT__ "PCBDDCConstraintsSetUp" 4454 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 4455 { 4456 PetscErrorCode ierr; 4457 PC_IS* pcis = (PC_IS*)(pc->data); 4458 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4459 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4460 /* one and zero */ 4461 PetscScalar one=1.0,zero=0.0; 4462 /* space to store constraints and their local indices */ 4463 PetscScalar *constraints_data; 4464 PetscInt *constraints_idxs,*constraints_idxs_B; 4465 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 4466 PetscInt *constraints_n; 4467 /* iterators */ 4468 PetscInt i,j,k,total_counts,total_counts_cc,cum; 4469 /* BLAS integers */ 4470 PetscBLASInt lwork,lierr; 4471 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 4472 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 4473 /* reuse */ 4474 PetscInt olocal_primal_size,olocal_primal_size_cc; 4475 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 4476 /* change of basis */ 4477 PetscBool qr_needed; 4478 PetscBT change_basis,qr_needed_idx; 4479 /* auxiliary stuff */ 4480 PetscInt *nnz,*is_indices; 4481 PetscInt ncc; 4482 /* some quantities */ 4483 PetscInt n_vertices,total_primal_vertices,valid_constraints; 4484 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 4485 4486 PetscFunctionBegin; 4487 /* Destroy Mat objects computed previously */ 4488 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 4489 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4490 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 4491 /* save info on constraints from previous setup (if any) */ 4492 olocal_primal_size = pcbddc->local_primal_size; 4493 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 4494 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 4495 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4496 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 4497 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4498 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4499 4500 if (!pcbddc->adaptive_selection) { 4501 IS ISForVertices,*ISForFaces,*ISForEdges; 4502 MatNullSpace nearnullsp; 4503 const Vec *nearnullvecs; 4504 Vec *localnearnullsp; 4505 PetscScalar *array; 4506 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 4507 PetscBool nnsp_has_cnst; 4508 /* LAPACK working arrays for SVD or POD */ 4509 PetscBool skip_lapack,boolforchange; 4510 PetscScalar *work; 4511 PetscReal *singular_vals; 4512 #if defined(PETSC_USE_COMPLEX) 4513 PetscReal *rwork; 4514 #endif 4515 #if defined(PETSC_MISSING_LAPACK_GESVD) 4516 PetscScalar *temp_basis,*correlation_mat; 4517 #else 4518 PetscBLASInt dummy_int=1; 4519 PetscScalar dummy_scalar=1.; 4520 #endif 4521 4522 /* Get index sets for faces, edges and vertices from graph */ 4523 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 4524 /* print some info */ 4525 if (pcbddc->dbg_flag && !pcbddc->sub_schurs) { 4526 PetscInt nv; 4527 4528 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4529 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 4530 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4531 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 4532 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 4533 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 4534 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 4535 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4536 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4537 } 4538 4539 /* free unneeded index sets */ 4540 if (!pcbddc->use_vertices) { 4541 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4542 } 4543 if (!pcbddc->use_edges) { 4544 for (i=0;i<n_ISForEdges;i++) { 4545 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4546 } 4547 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4548 n_ISForEdges = 0; 4549 } 4550 if (!pcbddc->use_faces) { 4551 for (i=0;i<n_ISForFaces;i++) { 4552 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4553 } 4554 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4555 n_ISForFaces = 0; 4556 } 4557 4558 /* check if near null space is attached to global mat */ 4559 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 4560 if (nearnullsp) { 4561 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 4562 /* remove any stored info */ 4563 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 4564 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4565 /* store information for BDDC solver reuse */ 4566 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 4567 pcbddc->onearnullspace = nearnullsp; 4568 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 4569 for (i=0;i<nnsp_size;i++) { 4570 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 4571 } 4572 } else { /* if near null space is not provided BDDC uses constants by default */ 4573 nnsp_size = 0; 4574 nnsp_has_cnst = PETSC_TRUE; 4575 } 4576 /* get max number of constraints on a single cc */ 4577 max_constraints = nnsp_size; 4578 if (nnsp_has_cnst) max_constraints++; 4579 4580 /* 4581 Evaluate maximum storage size needed by the procedure 4582 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 4583 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 4584 There can be multiple constraints per connected component 4585 */ 4586 n_vertices = 0; 4587 if (ISForVertices) { 4588 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 4589 } 4590 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 4591 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 4592 4593 total_counts = n_ISForFaces+n_ISForEdges; 4594 total_counts *= max_constraints; 4595 total_counts += n_vertices; 4596 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 4597 4598 total_counts = 0; 4599 max_size_of_constraint = 0; 4600 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 4601 IS used_is; 4602 if (i<n_ISForEdges) { 4603 used_is = ISForEdges[i]; 4604 } else { 4605 used_is = ISForFaces[i-n_ISForEdges]; 4606 } 4607 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 4608 total_counts += j; 4609 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 4610 } 4611 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 4612 4613 /* get local part of global near null space vectors */ 4614 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 4615 for (k=0;k<nnsp_size;k++) { 4616 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 4617 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4618 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4619 } 4620 4621 /* whether or not to skip lapack calls */ 4622 skip_lapack = PETSC_TRUE; 4623 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 4624 4625 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 4626 if (!skip_lapack) { 4627 PetscScalar temp_work; 4628 4629 #if defined(PETSC_MISSING_LAPACK_GESVD) 4630 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 4631 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 4632 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 4633 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 4634 #if defined(PETSC_USE_COMPLEX) 4635 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 4636 #endif 4637 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4638 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 4639 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 4640 lwork = -1; 4641 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4642 #if !defined(PETSC_USE_COMPLEX) 4643 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 4644 #else 4645 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 4646 #endif 4647 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4648 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 4649 #else /* on missing GESVD */ 4650 /* SVD */ 4651 PetscInt max_n,min_n; 4652 max_n = max_size_of_constraint; 4653 min_n = max_constraints; 4654 if (max_size_of_constraint < max_constraints) { 4655 min_n = max_size_of_constraint; 4656 max_n = max_constraints; 4657 } 4658 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 4659 #if defined(PETSC_USE_COMPLEX) 4660 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 4661 #endif 4662 /* now we evaluate the optimal workspace using query with lwork=-1 */ 4663 lwork = -1; 4664 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 4665 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 4666 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 4667 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4668 #if !defined(PETSC_USE_COMPLEX) 4669 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 4670 #else 4671 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 4672 #endif 4673 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4674 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 4675 #endif /* on missing GESVD */ 4676 /* Allocate optimal workspace */ 4677 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 4678 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 4679 } 4680 /* Now we can loop on constraining sets */ 4681 total_counts = 0; 4682 constraints_idxs_ptr[0] = 0; 4683 constraints_data_ptr[0] = 0; 4684 /* vertices */ 4685 if (n_vertices) { 4686 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4687 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4688 for (i=0;i<n_vertices;i++) { 4689 constraints_n[total_counts] = 1; 4690 constraints_data[total_counts] = 1.0; 4691 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 4692 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 4693 total_counts++; 4694 } 4695 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4696 n_vertices = total_counts; 4697 } 4698 4699 /* edges and faces */ 4700 total_counts_cc = total_counts; 4701 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 4702 IS used_is; 4703 PetscBool idxs_copied = PETSC_FALSE; 4704 4705 if (ncc<n_ISForEdges) { 4706 used_is = ISForEdges[ncc]; 4707 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 4708 } else { 4709 used_is = ISForFaces[ncc-n_ISForEdges]; 4710 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 4711 } 4712 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 4713 4714 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 4715 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4716 /* change of basis should not be performed on local periodic nodes */ 4717 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 4718 if (nnsp_has_cnst) { 4719 PetscScalar quad_value; 4720 4721 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4722 idxs_copied = PETSC_TRUE; 4723 4724 if (!pcbddc->use_nnsp_true) { 4725 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 4726 } else { 4727 quad_value = 1.0; 4728 } 4729 for (j=0;j<size_of_constraint;j++) { 4730 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 4731 } 4732 temp_constraints++; 4733 total_counts++; 4734 } 4735 for (k=0;k<nnsp_size;k++) { 4736 PetscReal real_value; 4737 PetscScalar *ptr_to_data; 4738 4739 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4740 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 4741 for (j=0;j<size_of_constraint;j++) { 4742 ptr_to_data[j] = array[is_indices[j]]; 4743 } 4744 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 4745 /* check if array is null on the connected component */ 4746 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4747 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 4748 if (real_value > 0.0) { /* keep indices and values */ 4749 temp_constraints++; 4750 total_counts++; 4751 if (!idxs_copied) { 4752 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 4753 idxs_copied = PETSC_TRUE; 4754 } 4755 } 4756 } 4757 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4758 valid_constraints = temp_constraints; 4759 if (!pcbddc->use_nnsp_true && temp_constraints) { 4760 if (temp_constraints == 1) { /* just normalize the constraint */ 4761 PetscScalar norm,*ptr_to_data; 4762 4763 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4764 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4765 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 4766 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 4767 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 4768 } else { /* perform SVD */ 4769 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 4770 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 4771 4772 #if defined(PETSC_MISSING_LAPACK_GESVD) 4773 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 4774 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 4775 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 4776 the constraints basis will differ (by a complex factor with absolute value equal to 1) 4777 from that computed using LAPACKgesvd 4778 -> This is due to a different computation of eigenvectors in LAPACKheev 4779 -> The quality of the POD-computed basis will be the same */ 4780 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 4781 /* Store upper triangular part of correlation matrix */ 4782 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 4783 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4784 for (j=0;j<temp_constraints;j++) { 4785 for (k=0;k<j+1;k++) { 4786 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 4787 } 4788 } 4789 /* compute eigenvalues and eigenvectors of correlation matrix */ 4790 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4791 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 4792 #if !defined(PETSC_USE_COMPLEX) 4793 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 4794 #else 4795 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 4796 #endif 4797 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4798 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 4799 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 4800 j = 0; 4801 while (j < temp_constraints && singular_vals[j] < tol) j++; 4802 total_counts = total_counts-j; 4803 valid_constraints = temp_constraints-j; 4804 /* scale and copy POD basis into used quadrature memory */ 4805 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4806 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4807 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 4808 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4809 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 4810 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 4811 if (j<temp_constraints) { 4812 PetscInt ii; 4813 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 4814 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4815 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 4816 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4817 for (k=0;k<temp_constraints-j;k++) { 4818 for (ii=0;ii<size_of_constraint;ii++) { 4819 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 4820 } 4821 } 4822 } 4823 #else /* on missing GESVD */ 4824 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 4825 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 4826 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 4827 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 4828 #if !defined(PETSC_USE_COMPLEX) 4829 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 4830 #else 4831 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 4832 #endif 4833 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 4834 ierr = PetscFPTrapPop();CHKERRQ(ierr); 4835 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 4836 k = temp_constraints; 4837 if (k > size_of_constraint) k = size_of_constraint; 4838 j = 0; 4839 while (j < k && singular_vals[k-j-1] < tol) j++; 4840 valid_constraints = k-j; 4841 total_counts = total_counts-temp_constraints+valid_constraints; 4842 #endif /* on missing GESVD */ 4843 } 4844 } 4845 /* update pointers information */ 4846 if (valid_constraints) { 4847 constraints_n[total_counts_cc] = valid_constraints; 4848 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 4849 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 4850 /* set change_of_basis flag */ 4851 if (boolforchange) { 4852 PetscBTSet(change_basis,total_counts_cc); 4853 } 4854 total_counts_cc++; 4855 } 4856 } 4857 /* free workspace */ 4858 if (!skip_lapack) { 4859 ierr = PetscFree(work);CHKERRQ(ierr); 4860 #if defined(PETSC_USE_COMPLEX) 4861 ierr = PetscFree(rwork);CHKERRQ(ierr); 4862 #endif 4863 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 4864 #if defined(PETSC_MISSING_LAPACK_GESVD) 4865 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 4866 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 4867 #endif 4868 } 4869 for (k=0;k<nnsp_size;k++) { 4870 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 4871 } 4872 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 4873 /* free index sets of faces, edges and vertices */ 4874 for (i=0;i<n_ISForFaces;i++) { 4875 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 4876 } 4877 if (n_ISForFaces) { 4878 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 4879 } 4880 for (i=0;i<n_ISForEdges;i++) { 4881 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 4882 } 4883 if (n_ISForEdges) { 4884 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 4885 } 4886 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 4887 } else { 4888 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4889 4890 total_counts = 0; 4891 n_vertices = 0; 4892 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 4893 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 4894 } 4895 max_constraints = 0; 4896 total_counts_cc = 0; 4897 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4898 total_counts += pcbddc->adaptive_constraints_n[i]; 4899 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 4900 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 4901 } 4902 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 4903 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 4904 constraints_idxs = pcbddc->adaptive_constraints_idxs; 4905 constraints_data = pcbddc->adaptive_constraints_data; 4906 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 4907 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 4908 total_counts_cc = 0; 4909 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 4910 if (pcbddc->adaptive_constraints_n[i]) { 4911 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 4912 } 4913 } 4914 #if 0 4915 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 4916 for (i=0;i<total_counts_cc;i++) { 4917 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 4918 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 4919 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 4920 printf(" %d",constraints_idxs[j]); 4921 } 4922 printf("\n"); 4923 printf("number of cc: %d\n",constraints_n[i]); 4924 } 4925 for (i=0;i<n_vertices;i++) { 4926 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 4927 } 4928 for (i=0;i<sub_schurs->n_subs;i++) { 4929 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 4930 } 4931 #endif 4932 4933 max_size_of_constraint = 0; 4934 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 4935 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 4936 /* Change of basis */ 4937 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 4938 if (pcbddc->use_change_of_basis) { 4939 for (i=0;i<sub_schurs->n_subs;i++) { 4940 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 4941 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 4942 } 4943 } 4944 } 4945 } 4946 pcbddc->local_primal_size = total_counts; 4947 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4948 4949 /* map constraints_idxs in boundary numbering */ 4950 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 4951 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 4952 4953 /* Create constraint matrix */ 4954 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 4955 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 4956 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 4957 4958 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 4959 /* determine if a QR strategy is needed for change of basis */ 4960 qr_needed = PETSC_FALSE; 4961 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 4962 total_primal_vertices=0; 4963 pcbddc->local_primal_size_cc = 0; 4964 for (i=0;i<total_counts_cc;i++) { 4965 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 4966 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 4967 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 4968 pcbddc->local_primal_size_cc += 1; 4969 } else if (PetscBTLookup(change_basis,i)) { 4970 for (k=0;k<constraints_n[i];k++) { 4971 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 4972 } 4973 pcbddc->local_primal_size_cc += constraints_n[i]; 4974 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 4975 PetscBTSet(qr_needed_idx,i); 4976 qr_needed = PETSC_TRUE; 4977 } 4978 } else { 4979 pcbddc->local_primal_size_cc += 1; 4980 } 4981 } 4982 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 4983 pcbddc->n_vertices = total_primal_vertices; 4984 /* permute indices in order to have a sorted set of vertices */ 4985 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 4986 4987 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 4988 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 4989 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 4990 4991 /* nonzero structure of constraint matrix */ 4992 /* and get reference dof for local constraints */ 4993 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 4994 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 4995 4996 j = total_primal_vertices; 4997 total_counts = total_primal_vertices; 4998 cum = total_primal_vertices; 4999 for (i=n_vertices;i<total_counts_cc;i++) { 5000 if (!PetscBTLookup(change_basis,i)) { 5001 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5002 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5003 cum++; 5004 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5005 for (k=0;k<constraints_n[i];k++) { 5006 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5007 nnz[j+k] = size_of_constraint; 5008 } 5009 j += constraints_n[i]; 5010 } 5011 } 5012 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5013 ierr = PetscFree(nnz);CHKERRQ(ierr); 5014 5015 /* set values in constraint matrix */ 5016 for (i=0;i<total_primal_vertices;i++) { 5017 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5018 } 5019 total_counts = total_primal_vertices; 5020 for (i=n_vertices;i<total_counts_cc;i++) { 5021 if (!PetscBTLookup(change_basis,i)) { 5022 PetscInt *cols; 5023 5024 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5025 cols = constraints_idxs+constraints_idxs_ptr[i]; 5026 for (k=0;k<constraints_n[i];k++) { 5027 PetscInt row = total_counts+k; 5028 PetscScalar *vals; 5029 5030 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5031 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5032 } 5033 total_counts += constraints_n[i]; 5034 } 5035 } 5036 /* assembling */ 5037 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5038 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5039 5040 /* 5041 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5042 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5043 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5044 */ 5045 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5046 if (pcbddc->use_change_of_basis) { 5047 /* dual and primal dofs on a single cc */ 5048 PetscInt dual_dofs,primal_dofs; 5049 /* working stuff for GEQRF */ 5050 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5051 PetscBLASInt lqr_work; 5052 /* working stuff for UNGQR */ 5053 PetscScalar *gqr_work,lgqr_work_t; 5054 PetscBLASInt lgqr_work; 5055 /* working stuff for TRTRS */ 5056 PetscScalar *trs_rhs; 5057 PetscBLASInt Blas_NRHS; 5058 /* pointers for values insertion into change of basis matrix */ 5059 PetscInt *start_rows,*start_cols; 5060 PetscScalar *start_vals; 5061 /* working stuff for values insertion */ 5062 PetscBT is_primal; 5063 PetscInt *aux_primal_numbering_B; 5064 /* matrix sizes */ 5065 PetscInt global_size,local_size; 5066 /* temporary change of basis */ 5067 Mat localChangeOfBasisMatrix; 5068 /* extra space for debugging */ 5069 PetscScalar *dbg_work; 5070 5071 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5072 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5073 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5074 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5075 /* nonzeros for local mat */ 5076 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5077 if (!pcbddc->benign_change || pcbddc->fake_change) { 5078 for (i=0;i<pcis->n;i++) nnz[i]=1; 5079 } else { 5080 const PetscInt *ii; 5081 PetscInt n; 5082 PetscBool flg_row; 5083 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5084 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5085 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5086 } 5087 for (i=n_vertices;i<total_counts_cc;i++) { 5088 if (PetscBTLookup(change_basis,i)) { 5089 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5090 if (PetscBTLookup(qr_needed_idx,i)) { 5091 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5092 } else { 5093 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5094 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5095 } 5096 } 5097 } 5098 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5099 ierr = PetscFree(nnz);CHKERRQ(ierr); 5100 /* Set interior change in the matrix */ 5101 if (!pcbddc->benign_change || pcbddc->fake_change) { 5102 for (i=0;i<pcis->n;i++) { 5103 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5104 } 5105 } else { 5106 const PetscInt *ii,*jj; 5107 PetscScalar *aa; 5108 PetscInt n; 5109 PetscBool flg_row; 5110 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5111 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5112 for (i=0;i<n;i++) { 5113 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5114 } 5115 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5116 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5117 } 5118 5119 if (pcbddc->dbg_flag) { 5120 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5121 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5122 } 5123 5124 5125 /* Now we loop on the constraints which need a change of basis */ 5126 /* 5127 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 5128 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 5129 5130 Basic blocks of change of basis matrix T computed by 5131 5132 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 5133 5134 | 1 0 ... 0 s_1/S | 5135 | 0 1 ... 0 s_2/S | 5136 | ... | 5137 | 0 ... 1 s_{n-1}/S | 5138 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 5139 5140 with S = \sum_{i=1}^n s_i^2 5141 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 5142 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 5143 5144 - QR decomposition of constraints otherwise 5145 */ 5146 if (qr_needed) { 5147 /* space to store Q */ 5148 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 5149 /* first we issue queries for optimal work */ 5150 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5151 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5152 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5153 lqr_work = -1; 5154 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 5155 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 5156 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 5157 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 5158 lgqr_work = -1; 5159 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 5160 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 5161 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 5162 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5163 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 5164 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 5165 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 5166 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 5167 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 5168 /* array to store scaling factors for reflectors */ 5169 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 5170 /* array to store rhs and solution of triangular solver */ 5171 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 5172 /* allocating workspace for check */ 5173 if (pcbddc->dbg_flag) { 5174 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 5175 } 5176 } 5177 /* array to store whether a node is primal or not */ 5178 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 5179 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 5180 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 5181 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 5182 for (i=0;i<total_primal_vertices;i++) { 5183 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 5184 } 5185 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 5186 5187 /* loop on constraints and see whether or not they need a change of basis and compute it */ 5188 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 5189 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 5190 if (PetscBTLookup(change_basis,total_counts)) { 5191 /* get constraint info */ 5192 primal_dofs = constraints_n[total_counts]; 5193 dual_dofs = size_of_constraint-primal_dofs; 5194 5195 if (pcbddc->dbg_flag) { 5196 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 5197 } 5198 5199 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 5200 5201 /* copy quadrature constraints for change of basis check */ 5202 if (pcbddc->dbg_flag) { 5203 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5204 } 5205 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 5206 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5207 5208 /* compute QR decomposition of constraints */ 5209 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5210 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5211 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5212 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5213 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 5214 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 5215 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5216 5217 /* explictly compute R^-T */ 5218 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 5219 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 5220 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5221 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 5222 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5223 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5224 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5225 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 5226 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 5227 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5228 5229 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 5230 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5231 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5232 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5233 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5234 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5235 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 5236 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 5237 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5238 5239 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 5240 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 5241 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 5242 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5243 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 5244 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 5245 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5246 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 5247 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5248 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5249 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 5250 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5251 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 5252 5253 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 5254 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 5255 /* insert cols for primal dofs */ 5256 for (j=0;j<primal_dofs;j++) { 5257 start_vals = &qr_basis[j*size_of_constraint]; 5258 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5259 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5260 } 5261 /* insert cols for dual dofs */ 5262 for (j=0,k=0;j<dual_dofs;k++) { 5263 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 5264 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 5265 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5266 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 5267 j++; 5268 } 5269 } 5270 5271 /* check change of basis */ 5272 if (pcbddc->dbg_flag) { 5273 PetscInt ii,jj; 5274 PetscBool valid_qr=PETSC_TRUE; 5275 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 5276 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5277 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 5278 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5279 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 5280 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 5281 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5282 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 5283 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5284 for (jj=0;jj<size_of_constraint;jj++) { 5285 for (ii=0;ii<primal_dofs;ii++) { 5286 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 5287 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 5288 } 5289 } 5290 if (!valid_qr) { 5291 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 5292 for (jj=0;jj<size_of_constraint;jj++) { 5293 for (ii=0;ii<primal_dofs;ii++) { 5294 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 5295 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5296 } 5297 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 5298 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 5299 } 5300 } 5301 } 5302 } else { 5303 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 5304 } 5305 } 5306 } else { /* simple transformation block */ 5307 PetscInt row,col; 5308 PetscScalar val,norm; 5309 5310 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5311 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 5312 for (j=0;j<size_of_constraint;j++) { 5313 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 5314 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 5315 if (!PetscBTLookup(is_primal,row_B)) { 5316 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 5317 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 5318 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 5319 } else { 5320 for (k=0;k<size_of_constraint;k++) { 5321 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 5322 if (row != col) { 5323 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 5324 } else { 5325 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 5326 } 5327 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 5328 } 5329 } 5330 } 5331 if (pcbddc->dbg_flag) { 5332 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 5333 } 5334 } 5335 } else { 5336 if (pcbddc->dbg_flag) { 5337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 5338 } 5339 } 5340 } 5341 5342 /* free workspace */ 5343 if (qr_needed) { 5344 if (pcbddc->dbg_flag) { 5345 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 5346 } 5347 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 5348 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 5349 ierr = PetscFree(qr_work);CHKERRQ(ierr); 5350 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 5351 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 5352 } 5353 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 5354 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5355 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5356 5357 /* assembling of global change of variable */ 5358 if (!pcbddc->fake_change) { 5359 Mat tmat; 5360 PetscInt bs; 5361 5362 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 5363 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 5364 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5365 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 5366 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5367 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5368 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 5369 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 5370 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 5371 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 5372 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5373 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5374 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5375 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5376 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5377 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5378 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5379 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 5380 5381 /* check */ 5382 if (pcbddc->dbg_flag) { 5383 PetscReal error; 5384 Vec x,x_change; 5385 5386 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 5387 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 5388 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 5389 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 5390 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5391 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5392 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 5393 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5394 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5395 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 5396 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5397 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5399 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 5400 ierr = VecDestroy(&x);CHKERRQ(ierr); 5401 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5402 } 5403 /* adapt sub_schurs computed (if any) */ 5404 if (pcbddc->use_deluxe_scaling) { 5405 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5406 5407 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 5408 if (sub_schurs && sub_schurs->S_Ej_all) { 5409 Mat S_new,tmat; 5410 IS is_all_N,is_V_Sall = NULL; 5411 5412 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 5413 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 5414 if (pcbddc->deluxe_zerorows) { 5415 ISLocalToGlobalMapping NtoSall; 5416 IS is_V; 5417 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 5418 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 5419 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 5420 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 5421 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 5422 } 5423 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 5424 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5425 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 5426 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5427 if (pcbddc->deluxe_zerorows) { 5428 const PetscScalar *array; 5429 const PetscInt *idxs_V,*idxs_all; 5430 PetscInt i,n_V; 5431 5432 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5433 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 5434 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5435 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5436 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 5437 for (i=0;i<n_V;i++) { 5438 PetscScalar val; 5439 PetscInt idx; 5440 5441 idx = idxs_V[i]; 5442 val = array[idxs_all[idxs_V[i]]]; 5443 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 5444 } 5445 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5446 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5447 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 5448 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 5449 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 5450 } 5451 sub_schurs->S_Ej_all = S_new; 5452 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5453 if (sub_schurs->sum_S_Ej_all) { 5454 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 5455 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 5456 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 5457 if (pcbddc->deluxe_zerorows) { 5458 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 5459 } 5460 sub_schurs->sum_S_Ej_all = S_new; 5461 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 5462 } 5463 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 5464 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5465 } 5466 /* destroy any change of basis context in sub_schurs */ 5467 if (sub_schurs && sub_schurs->change) { 5468 PetscInt i; 5469 5470 for (i=0;i<sub_schurs->n_subs;i++) { 5471 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 5472 } 5473 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 5474 } 5475 } 5476 if (pcbddc->switch_static) { /* need to save the local change */ 5477 pcbddc->switch_static_change = localChangeOfBasisMatrix; 5478 } else { 5479 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 5480 } 5481 /* determine if any process has changed the pressures locally */ 5482 pcbddc->change_interior = pcbddc->benign_have_null; 5483 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 5484 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5485 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 5486 pcbddc->use_qr_single = qr_needed; 5487 } 5488 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 5489 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 5490 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 5491 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 5492 } else { 5493 Mat benign_global = NULL; 5494 if (pcbddc->benign_have_null) { 5495 Mat tmat; 5496 5497 pcbddc->change_interior = PETSC_TRUE; 5498 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5499 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5500 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5501 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5502 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 5503 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5504 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5505 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 5506 if (pcbddc->benign_change) { 5507 Mat M; 5508 5509 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 5510 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 5511 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 5512 ierr = MatDestroy(&M);CHKERRQ(ierr); 5513 } else { 5514 Mat eye; 5515 PetscScalar *array; 5516 5517 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5518 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 5519 for (i=0;i<pcis->n;i++) { 5520 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 5521 } 5522 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5523 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5524 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5525 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 5526 ierr = MatDestroy(&eye);CHKERRQ(ierr); 5527 } 5528 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 5529 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 5530 } 5531 if (pcbddc->user_ChangeOfBasisMatrix) { 5532 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5533 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 5534 } else if (pcbddc->benign_have_null) { 5535 pcbddc->ChangeOfBasisMatrix = benign_global; 5536 } 5537 } 5538 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 5539 IS is_global; 5540 const PetscInt *gidxs; 5541 5542 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5543 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 5544 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 5545 ierr = MatGetSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 5546 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 5547 } 5548 } 5549 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 5550 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 5551 } 5552 5553 if (!pcbddc->fake_change) { 5554 /* add pressure dofs to set of primal nodes for numbering purposes */ 5555 for (i=0;i<pcbddc->benign_n;i++) { 5556 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 5557 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 5558 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 5559 pcbddc->local_primal_size_cc++; 5560 pcbddc->local_primal_size++; 5561 } 5562 5563 /* check if a new primal space has been introduced (also take into account benign trick) */ 5564 pcbddc->new_primal_space_local = PETSC_TRUE; 5565 if (olocal_primal_size == pcbddc->local_primal_size) { 5566 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5567 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5568 if (!pcbddc->new_primal_space_local) { 5569 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 5570 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 5571 } 5572 } 5573 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 5574 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5575 } 5576 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 5577 5578 /* flush dbg viewer */ 5579 if (pcbddc->dbg_flag) { 5580 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5581 } 5582 5583 /* free workspace */ 5584 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 5585 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 5586 if (!pcbddc->adaptive_selection) { 5587 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 5588 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 5589 } else { 5590 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 5591 pcbddc->adaptive_constraints_idxs_ptr, 5592 pcbddc->adaptive_constraints_data_ptr, 5593 pcbddc->adaptive_constraints_idxs, 5594 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 5595 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 5596 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 5597 } 5598 PetscFunctionReturn(0); 5599 } 5600 5601 #undef __FUNCT__ 5602 #define __FUNCT__ "PCBDDCAnalyzeInterface" 5603 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 5604 { 5605 ISLocalToGlobalMapping map; 5606 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5607 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 5608 PetscInt ierr,i,N; 5609 5610 PetscFunctionBegin; 5611 if (pcbddc->graphanalyzed && !pcbddc->recompute_topography) PetscFunctionReturn(0); 5612 /* Reset previously computed graph */ 5613 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 5614 /* Init local Graph struct */ 5615 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 5616 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 5617 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 5618 5619 /* Check validity of the csr graph passed in by the user */ 5620 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 5621 5622 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 5623 if ( (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) && pcbddc->use_local_adj) { 5624 PetscInt *xadj,*adjncy; 5625 PetscInt nvtxs; 5626 PetscBool flg_row=PETSC_FALSE; 5627 5628 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5629 if (flg_row) { 5630 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 5631 pcbddc->computed_rowadj = PETSC_TRUE; 5632 } 5633 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 5634 } 5635 if (pcbddc->dbg_flag) { 5636 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5637 } 5638 5639 /* Setup of Graph */ 5640 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 5641 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 5642 5643 /* attach info on disconnected subdomains if present */ 5644 if (pcbddc->n_local_subs) { 5645 PetscInt *local_subs; 5646 5647 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 5648 for (i=0;i<pcbddc->n_local_subs;i++) { 5649 const PetscInt *idxs; 5650 PetscInt nl,j; 5651 5652 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 5653 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5654 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 5655 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 5656 } 5657 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 5658 pcbddc->mat_graph->local_subs = local_subs; 5659 } 5660 5661 /* Graph's connected components analysis */ 5662 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 5663 5664 /* set flag indicating analysis has been done */ 5665 pcbddc->graphanalyzed = PETSC_TRUE; 5666 PetscFunctionReturn(0); 5667 } 5668 5669 #undef __FUNCT__ 5670 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 5671 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 5672 { 5673 PetscInt i,j; 5674 PetscScalar *alphas; 5675 PetscErrorCode ierr; 5676 5677 PetscFunctionBegin; 5678 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 5679 for (i=0;i<n;i++) { 5680 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 5681 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 5682 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 5683 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 5684 } 5685 ierr = PetscFree(alphas);CHKERRQ(ierr); 5686 PetscFunctionReturn(0); 5687 } 5688 5689 #undef __FUNCT__ 5690 #define __FUNCT__ "MatISGetSubassemblingPattern" 5691 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 5692 { 5693 Mat A; 5694 PetscInt n_neighs,*neighs,*n_shared,**shared; 5695 PetscMPIInt size,rank,color; 5696 PetscInt *xadj,*adjncy; 5697 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 5698 PetscInt im_active,active_procs,n,i,j,local_size,threshold = 2; 5699 PetscInt void_procs,*procs_candidates = NULL; 5700 PetscInt xadj_count, *count; 5701 PetscBool ismatis,use_vwgt=PETSC_FALSE; 5702 PetscSubcomm psubcomm; 5703 MPI_Comm subcomm; 5704 PetscErrorCode ierr; 5705 5706 PetscFunctionBegin; 5707 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5708 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5709 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5710 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 5711 PetscValidLogicalCollectiveInt(mat,redprocs,3); 5712 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 5713 5714 if (have_void) *have_void = PETSC_FALSE; 5715 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 5716 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 5717 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 5718 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 5719 im_active = !!(n); 5720 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5721 void_procs = size - active_procs; 5722 /* get ranks of of non-active processes in mat communicator */ 5723 if (void_procs) { 5724 PetscInt ncand; 5725 5726 if (have_void) *have_void = PETSC_TRUE; 5727 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 5728 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 5729 for (i=0,ncand=0;i<size;i++) { 5730 if (!procs_candidates[i]) { 5731 procs_candidates[ncand++] = i; 5732 } 5733 } 5734 /* force n_subdomains to be not greater that the number of non-active processes */ 5735 *n_subdomains = PetscMin(void_procs,*n_subdomains); 5736 } 5737 5738 /* number of subdomains requested greater than active processes -> just shift the matrix 5739 number of subdomains requested 1 -> send to master or first candidate in voids */ 5740 if (active_procs < *n_subdomains || *n_subdomains == 1) { 5741 PetscInt issize,isidx,dest; 5742 if (*n_subdomains == 1) dest = 0; 5743 else dest = rank; 5744 if (im_active) { 5745 issize = 1; 5746 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5747 isidx = procs_candidates[dest]; 5748 } else { 5749 isidx = dest; 5750 } 5751 } else { 5752 issize = 0; 5753 isidx = -1; 5754 } 5755 *n_subdomains = active_procs; 5756 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 5757 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5758 PetscFunctionReturn(0); 5759 } 5760 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 5761 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 5762 threshold = PetscMax(threshold,2); 5763 5764 /* Get info on mapping */ 5765 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 5766 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5767 5768 /* build local CSR graph of subdomains' connectivity */ 5769 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 5770 xadj[0] = 0; 5771 xadj[1] = PetscMax(n_neighs-1,0); 5772 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 5773 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 5774 ierr = PetscCalloc1(local_size,&count);CHKERRQ(ierr); 5775 for (i=1;i<n_neighs;i++) 5776 for (j=0;j<n_shared[i];j++) 5777 count[shared[i][j]] += 1; 5778 5779 xadj_count = 0; 5780 for (i=1;i<n_neighs;i++) { 5781 for (j=0;j<n_shared[i];j++) { 5782 if (count[shared[i][j]] < threshold) { 5783 adjncy[xadj_count] = neighs[i]; 5784 adjncy_wgt[xadj_count] = n_shared[i]; 5785 xadj_count++; 5786 break; 5787 } 5788 } 5789 } 5790 xadj[1] = xadj_count; 5791 ierr = PetscFree(count);CHKERRQ(ierr); 5792 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 5793 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5794 5795 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 5796 5797 /* Restrict work on active processes only */ 5798 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 5799 if (void_procs) { 5800 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 5801 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 5802 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 5803 subcomm = PetscSubcommChild(psubcomm); 5804 } else { 5805 psubcomm = NULL; 5806 subcomm = PetscObjectComm((PetscObject)mat); 5807 } 5808 5809 v_wgt = NULL; 5810 if (!color) { 5811 ierr = PetscFree(xadj);CHKERRQ(ierr); 5812 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5813 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5814 } else { 5815 Mat subdomain_adj; 5816 IS new_ranks,new_ranks_contig; 5817 MatPartitioning partitioner; 5818 PetscInt rstart=0,rend=0; 5819 PetscInt *is_indices,*oldranks; 5820 PetscMPIInt size; 5821 PetscBool aggregate; 5822 5823 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 5824 if (void_procs) { 5825 PetscInt prank = rank; 5826 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 5827 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 5828 for (i=0;i<xadj[1];i++) { 5829 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 5830 } 5831 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 5832 } else { 5833 oldranks = NULL; 5834 } 5835 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 5836 if (aggregate) { /* TODO: all this part could be made more efficient */ 5837 PetscInt lrows,row,ncols,*cols; 5838 PetscMPIInt nrank; 5839 PetscScalar *vals; 5840 5841 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 5842 lrows = 0; 5843 if (nrank<redprocs) { 5844 lrows = size/redprocs; 5845 if (nrank<size%redprocs) lrows++; 5846 } 5847 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 5848 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 5849 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5850 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5851 row = nrank; 5852 ncols = xadj[1]-xadj[0]; 5853 cols = adjncy; 5854 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 5855 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 5856 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5857 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5858 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5859 ierr = PetscFree(xadj);CHKERRQ(ierr); 5860 ierr = PetscFree(adjncy);CHKERRQ(ierr); 5861 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 5862 ierr = PetscFree(vals);CHKERRQ(ierr); 5863 if (use_vwgt) { 5864 Vec v; 5865 const PetscScalar *array; 5866 PetscInt nl; 5867 5868 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 5869 ierr = VecSetValue(v,row,(PetscScalar)local_size,INSERT_VALUES);CHKERRQ(ierr); 5870 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 5871 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 5872 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 5873 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 5874 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 5875 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 5876 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 5877 ierr = VecDestroy(&v);CHKERRQ(ierr); 5878 } 5879 } else { 5880 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 5881 if (use_vwgt) { 5882 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 5883 v_wgt[0] = local_size; 5884 } 5885 } 5886 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 5887 5888 /* Partition */ 5889 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 5890 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 5891 if (v_wgt) { 5892 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 5893 } 5894 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 5895 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 5896 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 5897 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 5898 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 5899 5900 /* renumber new_ranks to avoid "holes" in new set of processors */ 5901 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 5902 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 5903 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5904 if (!aggregate) { 5905 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5906 #if defined(PETSC_USE_DEBUG) 5907 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5908 #endif 5909 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 5910 } else if (oldranks) { 5911 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 5912 } else { 5913 ranks_send_to_idx[0] = is_indices[0]; 5914 } 5915 } else { 5916 PetscInt idxs[1]; 5917 PetscMPIInt tag; 5918 MPI_Request *reqs; 5919 5920 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 5921 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 5922 for (i=rstart;i<rend;i++) { 5923 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 5924 } 5925 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 5926 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 5927 ierr = PetscFree(reqs);CHKERRQ(ierr); 5928 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 5929 #if defined(PETSC_USE_DEBUG) 5930 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 5931 #endif 5932 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 5933 } else if (oldranks) { 5934 ranks_send_to_idx[0] = oldranks[idxs[0]]; 5935 } else { 5936 ranks_send_to_idx[0] = idxs[0]; 5937 } 5938 } 5939 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5940 /* clean up */ 5941 ierr = PetscFree(oldranks);CHKERRQ(ierr); 5942 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 5943 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 5944 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 5945 } 5946 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 5947 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 5948 5949 /* assemble parallel IS for sends */ 5950 i = 1; 5951 if (!color) i=0; 5952 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 5953 PetscFunctionReturn(0); 5954 } 5955 5956 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 5957 5958 #undef __FUNCT__ 5959 #define __FUNCT__ "MatISSubassemble" 5960 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 5961 { 5962 Mat local_mat; 5963 IS is_sends_internal; 5964 PetscInt rows,cols,new_local_rows; 5965 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 5966 PetscBool ismatis,isdense,newisdense,destroy_mat; 5967 ISLocalToGlobalMapping l2gmap; 5968 PetscInt* l2gmap_indices; 5969 const PetscInt* is_indices; 5970 MatType new_local_type; 5971 /* buffers */ 5972 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 5973 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 5974 PetscInt *recv_buffer_idxs_local; 5975 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 5976 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 5977 /* MPI */ 5978 MPI_Comm comm,comm_n; 5979 PetscSubcomm subcomm; 5980 PetscMPIInt n_sends,n_recvs,commsize; 5981 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 5982 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 5983 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 5984 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 5985 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 5986 PetscErrorCode ierr; 5987 5988 PetscFunctionBegin; 5989 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 5990 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 5991 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 5992 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 5993 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 5994 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 5995 PetscValidLogicalCollectiveBool(mat,reuse,6); 5996 PetscValidLogicalCollectiveInt(mat,nis,8); 5997 PetscValidLogicalCollectiveInt(mat,nvecs,10); 5998 if (nvecs) { 5999 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6000 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6001 } 6002 /* further checks */ 6003 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6004 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6005 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6006 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6007 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6008 if (reuse && *mat_n) { 6009 PetscInt mrows,mcols,mnrows,mncols; 6010 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6011 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6012 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6013 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6014 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6015 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6016 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6017 } 6018 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6019 PetscValidLogicalCollectiveInt(mat,bs,0); 6020 6021 /* prepare IS for sending if not provided */ 6022 if (!is_sends) { 6023 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6024 ierr = MatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6025 } else { 6026 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6027 is_sends_internal = is_sends; 6028 } 6029 6030 /* get comm */ 6031 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6032 6033 /* compute number of sends */ 6034 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6035 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6036 6037 /* compute number of receives */ 6038 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6039 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6040 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6041 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6042 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6043 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6044 ierr = PetscFree(iflags);CHKERRQ(ierr); 6045 6046 /* restrict comm if requested */ 6047 subcomm = 0; 6048 destroy_mat = PETSC_FALSE; 6049 if (restrict_comm) { 6050 PetscMPIInt color,subcommsize; 6051 6052 color = 0; 6053 if (restrict_full) { 6054 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6055 } else { 6056 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6057 } 6058 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6059 subcommsize = commsize - subcommsize; 6060 /* check if reuse has been requested */ 6061 if (reuse) { 6062 if (*mat_n) { 6063 PetscMPIInt subcommsize2; 6064 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6065 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6066 comm_n = PetscObjectComm((PetscObject)*mat_n); 6067 } else { 6068 comm_n = PETSC_COMM_SELF; 6069 } 6070 } else { /* MAT_INITIAL_MATRIX */ 6071 PetscMPIInt rank; 6072 6073 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6074 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6075 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6076 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6077 comm_n = PetscSubcommChild(subcomm); 6078 } 6079 /* flag to destroy *mat_n if not significative */ 6080 if (color) destroy_mat = PETSC_TRUE; 6081 } else { 6082 comm_n = comm; 6083 } 6084 6085 /* prepare send/receive buffers */ 6086 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6087 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6088 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6089 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6090 if (nis) { 6091 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6092 } 6093 6094 /* Get data from local matrices */ 6095 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6096 /* TODO: See below some guidelines on how to prepare the local buffers */ 6097 /* 6098 send_buffer_vals should contain the raw values of the local matrix 6099 send_buffer_idxs should contain: 6100 - MatType_PRIVATE type 6101 - PetscInt size_of_l2gmap 6102 - PetscInt global_row_indices[size_of_l2gmap] 6103 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6104 */ 6105 else { 6106 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6107 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6108 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6109 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6110 send_buffer_idxs[1] = i; 6111 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6112 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6113 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6114 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 6115 for (i=0;i<n_sends;i++) { 6116 ilengths_vals[is_indices[i]] = len*len; 6117 ilengths_idxs[is_indices[i]] = len+2; 6118 } 6119 } 6120 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 6121 /* additional is (if any) */ 6122 if (nis) { 6123 PetscMPIInt psum; 6124 PetscInt j; 6125 for (j=0,psum=0;j<nis;j++) { 6126 PetscInt plen; 6127 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6128 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 6129 psum += len+1; /* indices + lenght */ 6130 } 6131 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 6132 for (j=0,psum=0;j<nis;j++) { 6133 PetscInt plen; 6134 const PetscInt *is_array_idxs; 6135 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 6136 send_buffer_idxs_is[psum] = plen; 6137 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6138 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 6139 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 6140 psum += plen+1; /* indices + lenght */ 6141 } 6142 for (i=0;i<n_sends;i++) { 6143 ilengths_idxs_is[is_indices[i]] = psum; 6144 } 6145 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 6146 } 6147 6148 buf_size_idxs = 0; 6149 buf_size_vals = 0; 6150 buf_size_idxs_is = 0; 6151 buf_size_vecs = 0; 6152 for (i=0;i<n_recvs;i++) { 6153 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6154 buf_size_vals += (PetscInt)olengths_vals[i]; 6155 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 6156 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 6157 } 6158 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 6159 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 6160 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 6161 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 6162 6163 /* get new tags for clean communications */ 6164 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 6165 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 6166 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 6167 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 6168 6169 /* allocate for requests */ 6170 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 6171 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 6172 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 6173 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 6174 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 6175 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 6176 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 6177 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 6178 6179 /* communications */ 6180 ptr_idxs = recv_buffer_idxs; 6181 ptr_vals = recv_buffer_vals; 6182 ptr_idxs_is = recv_buffer_idxs_is; 6183 ptr_vecs = recv_buffer_vecs; 6184 for (i=0;i<n_recvs;i++) { 6185 source_dest = onodes[i]; 6186 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 6187 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 6188 ptr_idxs += olengths_idxs[i]; 6189 ptr_vals += olengths_vals[i]; 6190 if (nis) { 6191 source_dest = onodes_is[i]; 6192 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 6193 ptr_idxs_is += olengths_idxs_is[i]; 6194 } 6195 if (nvecs) { 6196 source_dest = onodes[i]; 6197 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 6198 ptr_vecs += olengths_idxs[i]-2; 6199 } 6200 } 6201 for (i=0;i<n_sends;i++) { 6202 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 6203 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 6204 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 6205 if (nis) { 6206 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 6207 } 6208 if (nvecs) { 6209 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6210 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 6211 } 6212 } 6213 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6214 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 6215 6216 /* assemble new l2g map */ 6217 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6218 ptr_idxs = recv_buffer_idxs; 6219 new_local_rows = 0; 6220 for (i=0;i<n_recvs;i++) { 6221 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6222 ptr_idxs += olengths_idxs[i]; 6223 } 6224 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 6225 ptr_idxs = recv_buffer_idxs; 6226 new_local_rows = 0; 6227 for (i=0;i<n_recvs;i++) { 6228 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 6229 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 6230 ptr_idxs += olengths_idxs[i]; 6231 } 6232 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 6233 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 6234 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 6235 6236 /* infer new local matrix type from received local matrices type */ 6237 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 6238 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 6239 if (n_recvs) { 6240 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 6241 ptr_idxs = recv_buffer_idxs; 6242 for (i=0;i<n_recvs;i++) { 6243 if ((PetscInt)new_local_type_private != *ptr_idxs) { 6244 new_local_type_private = MATAIJ_PRIVATE; 6245 break; 6246 } 6247 ptr_idxs += olengths_idxs[i]; 6248 } 6249 switch (new_local_type_private) { 6250 case MATDENSE_PRIVATE: 6251 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 6252 new_local_type = MATSEQAIJ; 6253 bs = 1; 6254 } else { /* if I receive only 1 dense matrix */ 6255 new_local_type = MATSEQDENSE; 6256 bs = 1; 6257 } 6258 break; 6259 case MATAIJ_PRIVATE: 6260 new_local_type = MATSEQAIJ; 6261 bs = 1; 6262 break; 6263 case MATBAIJ_PRIVATE: 6264 new_local_type = MATSEQBAIJ; 6265 break; 6266 case MATSBAIJ_PRIVATE: 6267 new_local_type = MATSEQSBAIJ; 6268 break; 6269 default: 6270 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 6271 break; 6272 } 6273 } else { /* by default, new_local_type is seqdense */ 6274 new_local_type = MATSEQDENSE; 6275 bs = 1; 6276 } 6277 6278 /* create MATIS object if needed */ 6279 if (!reuse) { 6280 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 6281 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6282 } else { 6283 /* it also destroys the local matrices */ 6284 if (*mat_n) { 6285 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 6286 } else { /* this is a fake object */ 6287 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 6288 } 6289 } 6290 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 6291 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 6292 6293 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6294 6295 /* Global to local map of received indices */ 6296 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 6297 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 6298 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 6299 6300 /* restore attributes -> type of incoming data and its size */ 6301 buf_size_idxs = 0; 6302 for (i=0;i<n_recvs;i++) { 6303 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 6304 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 6305 buf_size_idxs += (PetscInt)olengths_idxs[i]; 6306 } 6307 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 6308 6309 /* set preallocation */ 6310 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 6311 if (!newisdense) { 6312 PetscInt *new_local_nnz=0; 6313 6314 ptr_idxs = recv_buffer_idxs_local; 6315 if (n_recvs) { 6316 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 6317 } 6318 for (i=0;i<n_recvs;i++) { 6319 PetscInt j; 6320 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 6321 for (j=0;j<*(ptr_idxs+1);j++) { 6322 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 6323 } 6324 } else { 6325 /* TODO */ 6326 } 6327 ptr_idxs += olengths_idxs[i]; 6328 } 6329 if (new_local_nnz) { 6330 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 6331 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 6332 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 6333 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6334 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 6335 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 6336 } else { 6337 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6338 } 6339 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 6340 } else { 6341 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 6342 } 6343 6344 /* set values */ 6345 ptr_vals = recv_buffer_vals; 6346 ptr_idxs = recv_buffer_idxs_local; 6347 for (i=0;i<n_recvs;i++) { 6348 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 6349 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 6350 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 6351 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6352 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 6353 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 6354 } else { 6355 /* TODO */ 6356 } 6357 ptr_idxs += olengths_idxs[i]; 6358 ptr_vals += olengths_vals[i]; 6359 } 6360 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6361 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6362 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6363 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6364 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 6365 6366 #if 0 6367 if (!restrict_comm) { /* check */ 6368 Vec lvec,rvec; 6369 PetscReal infty_error; 6370 6371 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 6372 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 6373 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 6374 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 6375 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 6376 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 6377 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 6378 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 6379 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 6380 } 6381 #endif 6382 6383 /* assemble new additional is (if any) */ 6384 if (nis) { 6385 PetscInt **temp_idxs,*count_is,j,psum; 6386 6387 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6388 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 6389 ptr_idxs = recv_buffer_idxs_is; 6390 psum = 0; 6391 for (i=0;i<n_recvs;i++) { 6392 for (j=0;j<nis;j++) { 6393 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6394 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 6395 psum += plen; 6396 ptr_idxs += plen+1; /* shift pointer to received data */ 6397 } 6398 } 6399 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 6400 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 6401 for (i=1;i<nis;i++) { 6402 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 6403 } 6404 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 6405 ptr_idxs = recv_buffer_idxs_is; 6406 for (i=0;i<n_recvs;i++) { 6407 for (j=0;j<nis;j++) { 6408 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 6409 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 6410 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 6411 ptr_idxs += plen+1; /* shift pointer to received data */ 6412 } 6413 } 6414 for (i=0;i<nis;i++) { 6415 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6416 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 6417 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6418 } 6419 ierr = PetscFree(count_is);CHKERRQ(ierr); 6420 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 6421 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 6422 } 6423 /* free workspace */ 6424 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 6425 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6426 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 6427 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6428 if (isdense) { 6429 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6430 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6431 } else { 6432 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 6433 } 6434 if (nis) { 6435 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6436 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 6437 } 6438 6439 if (nvecs) { 6440 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6441 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6442 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6443 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6444 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 6445 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 6446 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 6447 /* set values */ 6448 ptr_vals = recv_buffer_vecs; 6449 ptr_idxs = recv_buffer_idxs_local; 6450 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6451 for (i=0;i<n_recvs;i++) { 6452 PetscInt j; 6453 for (j=0;j<*(ptr_idxs+1);j++) { 6454 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 6455 } 6456 ptr_idxs += olengths_idxs[i]; 6457 ptr_vals += olengths_idxs[i]-2; 6458 } 6459 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 6460 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 6461 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 6462 } 6463 6464 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 6465 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 6466 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 6467 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 6468 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 6469 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 6470 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 6471 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 6472 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 6473 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 6474 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 6475 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 6476 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 6477 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 6478 ierr = PetscFree(onodes);CHKERRQ(ierr); 6479 if (nis) { 6480 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 6481 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 6482 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 6483 } 6484 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 6485 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 6486 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 6487 for (i=0;i<nis;i++) { 6488 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6489 } 6490 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 6491 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 6492 } 6493 *mat_n = NULL; 6494 } 6495 PetscFunctionReturn(0); 6496 } 6497 6498 /* temporary hack into ksp private data structure */ 6499 #include <petsc/private/kspimpl.h> 6500 6501 #undef __FUNCT__ 6502 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 6503 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 6504 { 6505 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6506 PC_IS *pcis = (PC_IS*)pc->data; 6507 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 6508 Mat coarsedivudotp = NULL; 6509 MatNullSpace CoarseNullSpace = NULL; 6510 ISLocalToGlobalMapping coarse_islg; 6511 IS coarse_is,*isarray; 6512 PetscInt i,im_active=-1,active_procs=-1; 6513 PetscInt nis,nisdofs,nisneu,nisvert; 6514 PC pc_temp; 6515 PCType coarse_pc_type; 6516 KSPType coarse_ksp_type; 6517 PetscBool multilevel_requested,multilevel_allowed; 6518 PetscBool isredundant,isbddc,isnn,coarse_reuse; 6519 Mat t_coarse_mat_is; 6520 PetscInt ncoarse; 6521 PetscBool compute_vecs = PETSC_FALSE; 6522 PetscScalar *array; 6523 MatReuse coarse_mat_reuse; 6524 PetscBool restr, full_restr, have_void; 6525 PetscErrorCode ierr; 6526 6527 PetscFunctionBegin; 6528 /* Assign global numbering to coarse dofs */ 6529 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 6530 PetscInt ocoarse_size; 6531 compute_vecs = PETSC_TRUE; 6532 ocoarse_size = pcbddc->coarse_size; 6533 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 6534 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 6535 /* see if we can avoid some work */ 6536 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 6537 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 6538 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 6539 PC pc; 6540 PetscBool isbddc; 6541 6542 /* temporary workaround since PCBDDC does not have a reset method so far */ 6543 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 6544 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 6545 if (isbddc) { 6546 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 6547 } else { 6548 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 6549 } 6550 coarse_reuse = PETSC_FALSE; 6551 } else { /* we can safely reuse already computed coarse matrix */ 6552 coarse_reuse = PETSC_TRUE; 6553 } 6554 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 6555 coarse_reuse = PETSC_FALSE; 6556 } 6557 /* reset any subassembling information */ 6558 if (!coarse_reuse || pcbddc->recompute_topography) { 6559 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6560 } 6561 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 6562 coarse_reuse = PETSC_TRUE; 6563 } 6564 /* assemble coarse matrix */ 6565 if (coarse_reuse && pcbddc->coarse_ksp) { 6566 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 6567 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 6568 coarse_mat_reuse = MAT_REUSE_MATRIX; 6569 } else { 6570 coarse_mat = NULL; 6571 coarse_mat_reuse = MAT_INITIAL_MATRIX; 6572 } 6573 6574 /* creates temporary l2gmap and IS for coarse indexes */ 6575 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 6576 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 6577 6578 /* creates temporary MATIS object for coarse matrix */ 6579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 6580 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6581 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 6582 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 6583 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 6584 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 6585 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6586 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6587 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 6588 6589 /* count "active" (i.e. with positive local size) and "void" processes */ 6590 im_active = !!(pcis->n); 6591 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6592 6593 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 6594 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 6595 /* full_restr : just use the receivers from the subassembling pattern */ 6596 coarse_mat_is = NULL; 6597 multilevel_allowed = PETSC_FALSE; 6598 multilevel_requested = PETSC_FALSE; 6599 full_restr = PETSC_TRUE; 6600 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 6601 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 6602 if (multilevel_requested) { 6603 ncoarse = active_procs/pcbddc->coarsening_ratio; 6604 restr = PETSC_FALSE; 6605 full_restr = PETSC_FALSE; 6606 } else { 6607 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 6608 restr = PETSC_TRUE; 6609 full_restr = PETSC_TRUE; 6610 } 6611 if (!pcbddc->coarse_size) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 6612 ncoarse = PetscMax(1,ncoarse); 6613 if (!pcbddc->coarse_subassembling) { 6614 if (pcbddc->coarsening_ratio > 1) { 6615 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 6616 } else { 6617 PetscMPIInt size,rank; 6618 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6619 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 6620 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 6621 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 6622 } 6623 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 6624 PetscInt psum; 6625 PetscMPIInt size; 6626 if (pcbddc->coarse_ksp) psum = 1; 6627 else psum = 0; 6628 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6629 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 6630 if (ncoarse < size) have_void = PETSC_TRUE; 6631 } 6632 /* determine if we can go multilevel */ 6633 if (multilevel_requested) { 6634 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 6635 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 6636 } 6637 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 6638 6639 /* dump subassembling pattern */ 6640 if (pcbddc->dbg_flag && multilevel_allowed) { 6641 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 6642 } 6643 6644 /* compute dofs splitting and neumann boundaries for coarse dofs */ 6645 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal)) { /* protects from unneded computations */ 6646 PetscInt *tidxs,*tidxs2,nout,tsize,i; 6647 const PetscInt *idxs; 6648 ISLocalToGlobalMapping tmap; 6649 6650 /* create map between primal indices (in local representative ordering) and local primal numbering */ 6651 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 6652 /* allocate space for temporary storage */ 6653 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 6654 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 6655 /* allocate for IS array */ 6656 nisdofs = pcbddc->n_ISForDofsLocal; 6657 nisneu = !!pcbddc->NeumannBoundariesLocal; 6658 nisvert = 0; /* nisvert is not used */ 6659 nis = nisdofs + nisneu + nisvert; 6660 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 6661 /* dofs splitting */ 6662 for (i=0;i<nisdofs;i++) { 6663 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 6664 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 6665 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6666 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6667 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 6668 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6669 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 6670 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 6671 } 6672 /* neumann boundaries */ 6673 if (pcbddc->NeumannBoundariesLocal) { 6674 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 6675 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 6676 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6677 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 6678 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 6679 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 6680 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 6681 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 6682 } 6683 /* free memory */ 6684 ierr = PetscFree(tidxs);CHKERRQ(ierr); 6685 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 6686 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 6687 } else { 6688 nis = 0; 6689 nisdofs = 0; 6690 nisneu = 0; 6691 nisvert = 0; 6692 isarray = NULL; 6693 } 6694 /* destroy no longer needed map */ 6695 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 6696 6697 /* subassemble */ 6698 if (multilevel_allowed) { 6699 Vec vp[1]; 6700 PetscInt nvecs = 0; 6701 PetscBool reuse,reuser; 6702 6703 if (coarse_mat) reuse = PETSC_TRUE; 6704 else reuse = PETSC_FALSE; 6705 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6706 vp[0] = NULL; 6707 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 6708 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 6709 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 6710 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 6711 nvecs = 1; 6712 6713 if (pcbddc->divudotp) { 6714 Mat B,loc_divudotp; 6715 Vec v,p; 6716 IS dummy; 6717 PetscInt np; 6718 6719 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 6720 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 6721 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 6722 ierr = MatGetSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 6723 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 6724 ierr = VecSet(p,1.);CHKERRQ(ierr); 6725 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 6726 ierr = VecDestroy(&p);CHKERRQ(ierr); 6727 ierr = MatDestroy(&B);CHKERRQ(ierr); 6728 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 6729 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 6730 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 6731 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 6732 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 6733 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 6734 ierr = VecDestroy(&v);CHKERRQ(ierr); 6735 } 6736 } 6737 if (reuser) { 6738 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6739 } else { 6740 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 6741 } 6742 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 6743 PetscScalar *arraym,*arrayv; 6744 PetscInt nl; 6745 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 6746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 6747 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6748 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 6749 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 6750 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 6751 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 6752 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 6753 } else { 6754 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 6755 } 6756 } else { 6757 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,0,NULL);CHKERRQ(ierr); 6758 } 6759 if (coarse_mat_is || coarse_mat) { 6760 PetscMPIInt size; 6761 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size); 6762 if (!multilevel_allowed) { 6763 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 6764 } else { 6765 Mat A; 6766 6767 /* if this matrix is present, it means we are not reusing the coarse matrix */ 6768 if (coarse_mat_is) { 6769 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 6770 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 6771 coarse_mat = coarse_mat_is; 6772 } 6773 /* be sure we don't have MatSeqDENSE as local mat */ 6774 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 6775 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 6776 } 6777 } 6778 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 6779 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 6780 6781 /* create local to global scatters for coarse problem */ 6782 if (compute_vecs) { 6783 PetscInt lrows; 6784 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 6785 if (coarse_mat) { 6786 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 6787 } else { 6788 lrows = 0; 6789 } 6790 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 6791 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 6792 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 6793 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6794 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 6795 } 6796 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 6797 6798 /* set defaults for coarse KSP and PC */ 6799 if (multilevel_allowed) { 6800 coarse_ksp_type = KSPRICHARDSON; 6801 coarse_pc_type = PCBDDC; 6802 } else { 6803 coarse_ksp_type = KSPPREONLY; 6804 coarse_pc_type = PCREDUNDANT; 6805 } 6806 6807 /* print some info if requested */ 6808 if (pcbddc->dbg_flag) { 6809 if (!multilevel_allowed) { 6810 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 6811 if (multilevel_requested) { 6812 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6813 } else if (pcbddc->max_levels) { 6814 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 6815 } 6816 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6817 } 6818 } 6819 6820 /* create the coarse KSP object only once with defaults */ 6821 if (coarse_mat) { 6822 PetscViewer dbg_viewer = NULL; 6823 if (pcbddc->dbg_flag) { 6824 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 6825 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6826 } 6827 if (!pcbddc->coarse_ksp) { 6828 char prefix[256],str_level[16]; 6829 size_t len; 6830 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 6831 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 6832 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 6833 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 6834 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6835 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 6836 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 6837 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6838 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6839 /* prefix */ 6840 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 6841 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 6842 if (!pcbddc->current_level) { 6843 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 6844 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 6845 } else { 6846 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 6847 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 6848 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 6849 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 6850 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 6851 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 6852 } 6853 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 6854 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6855 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 6856 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 6857 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 6858 /* allow user customization */ 6859 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 6860 } 6861 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 6862 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 6863 if (nisdofs) { 6864 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 6865 for (i=0;i<nisdofs;i++) { 6866 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 6867 } 6868 } 6869 if (nisneu) { 6870 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 6871 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 6872 } 6873 if (nisvert) { 6874 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 6875 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 6876 } 6877 6878 /* get some info after set from options */ 6879 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 6880 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 6881 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 6882 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 6883 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 6884 isbddc = PETSC_FALSE; 6885 } 6886 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 6887 if (isredundant) { 6888 KSP inner_ksp; 6889 PC inner_pc; 6890 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 6891 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 6892 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 6893 } 6894 6895 /* parameters which miss an API */ 6896 if (isbddc) { 6897 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 6898 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 6899 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 6900 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 6901 if (pcbddc_coarse->benign_saddle_point) { 6902 Mat coarsedivudotp_is; 6903 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 6904 IS row,col; 6905 const PetscInt *gidxs; 6906 PetscInt n,st,M,N; 6907 6908 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 6909 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 6910 st = st-n; 6911 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 6912 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 6913 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 6914 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6915 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 6916 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 6917 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 6918 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 6919 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 6920 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 6921 ierr = ISDestroy(&row);CHKERRQ(ierr); 6922 ierr = ISDestroy(&col);CHKERRQ(ierr); 6923 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 6924 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 6925 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 6926 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 6927 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 6928 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 6929 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 6930 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6931 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 6932 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 6933 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 6934 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 6935 } 6936 } 6937 6938 /* propagate symmetry info of coarse matrix */ 6939 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 6940 if (pc->pmat->symmetric_set) { 6941 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 6942 } 6943 if (pc->pmat->hermitian_set) { 6944 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 6945 } 6946 if (pc->pmat->spd_set) { 6947 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 6948 } 6949 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 6950 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 6951 } 6952 /* set operators */ 6953 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 6954 if (pcbddc->dbg_flag) { 6955 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 6956 } 6957 } 6958 ierr = PetscFree(isarray);CHKERRQ(ierr); 6959 #if 0 6960 { 6961 PetscViewer viewer; 6962 char filename[256]; 6963 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 6964 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 6965 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6966 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 6967 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 6968 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 6969 } 6970 #endif 6971 6972 if (pcbddc->coarse_ksp) { 6973 Vec crhs,csol; 6974 6975 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 6976 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 6977 if (!csol) { 6978 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 6979 } 6980 if (!crhs) { 6981 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 6982 } 6983 } 6984 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 6985 6986 /* compute null space for coarse solver if the benign trick has been requested */ 6987 if (pcbddc->benign_null) { 6988 6989 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 6990 for (i=0;i<pcbddc->benign_n;i++) { 6991 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6992 } 6993 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 6994 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 6995 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6996 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6997 if (coarse_mat) { 6998 Vec nullv; 6999 PetscScalar *array,*array2; 7000 PetscInt nl; 7001 7002 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7003 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7004 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7005 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7006 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7007 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7008 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7009 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7010 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7011 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7012 } 7013 } 7014 7015 if (pcbddc->coarse_ksp) { 7016 PetscBool ispreonly; 7017 7018 if (CoarseNullSpace) { 7019 PetscBool isnull; 7020 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7021 if (isnull) { 7022 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7023 } 7024 /* TODO: add local nullspaces (if any) */ 7025 } 7026 /* setup coarse ksp */ 7027 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7028 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7029 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7030 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7031 KSP check_ksp; 7032 KSPType check_ksp_type; 7033 PC check_pc; 7034 Vec check_vec,coarse_vec; 7035 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7036 PetscInt its; 7037 PetscBool compute_eigs; 7038 PetscReal *eigs_r,*eigs_c; 7039 PetscInt neigs; 7040 const char *prefix; 7041 7042 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7043 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7044 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7045 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7046 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7047 /* prevent from setup unneeded object */ 7048 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7049 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7050 if (ispreonly) { 7051 check_ksp_type = KSPPREONLY; 7052 compute_eigs = PETSC_FALSE; 7053 } else { 7054 check_ksp_type = KSPGMRES; 7055 compute_eigs = PETSC_TRUE; 7056 } 7057 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7058 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7059 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7060 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7061 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7062 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7063 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7064 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7065 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7066 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7067 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7068 /* create random vec */ 7069 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7070 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7071 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7072 /* solve coarse problem */ 7073 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7074 /* set eigenvalue estimation if preonly has not been requested */ 7075 if (compute_eigs) { 7076 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7077 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7078 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7079 if (neigs) { 7080 lambda_max = eigs_r[neigs-1]; 7081 lambda_min = eigs_r[0]; 7082 if (pcbddc->use_coarse_estimates) { 7083 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 7084 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 7085 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 7086 } 7087 } 7088 } 7089 } 7090 7091 /* check coarse problem residual error */ 7092 if (pcbddc->dbg_flag) { 7093 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 7094 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7095 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 7096 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7097 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7098 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 7099 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 7100 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 7101 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 7102 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 7103 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 7104 if (CoarseNullSpace) { 7105 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 7106 } 7107 if (compute_eigs) { 7108 PetscReal lambda_max_s,lambda_min_s; 7109 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 7110 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 7111 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 7112 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 7113 for (i=0;i<neigs;i++) { 7114 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 7115 } 7116 } 7117 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 7118 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 7119 } 7120 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 7121 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 7122 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 7123 if (compute_eigs) { 7124 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 7125 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 7126 } 7127 } 7128 } 7129 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 7130 /* print additional info */ 7131 if (pcbddc->dbg_flag) { 7132 /* waits until all processes reaches this point */ 7133 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 7134 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 7135 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7136 } 7137 7138 /* free memory */ 7139 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 7140 PetscFunctionReturn(0); 7141 } 7142 7143 #undef __FUNCT__ 7144 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 7145 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 7146 { 7147 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 7148 PC_IS* pcis = (PC_IS*)pc->data; 7149 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7150 IS subset,subset_mult,subset_n; 7151 PetscInt local_size,coarse_size=0; 7152 PetscInt *local_primal_indices=NULL; 7153 const PetscInt *t_local_primal_indices; 7154 PetscErrorCode ierr; 7155 7156 PetscFunctionBegin; 7157 /* Compute global number of coarse dofs */ 7158 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 7159 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 7160 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 7161 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7162 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 7163 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 7164 ierr = ISDestroy(&subset);CHKERRQ(ierr); 7165 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 7166 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 7167 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 7168 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 7169 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7170 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 7171 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 7172 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 7173 7174 /* check numbering */ 7175 if (pcbddc->dbg_flag) { 7176 PetscScalar coarsesum,*array,*array2; 7177 PetscInt i; 7178 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 7179 7180 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7181 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 7183 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7184 /* counter */ 7185 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7186 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 7187 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7188 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7189 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7190 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7191 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 7192 for (i=0;i<pcbddc->local_primal_size;i++) { 7193 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 7194 } 7195 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7196 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7197 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7198 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7199 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7200 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7201 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7202 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7203 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7204 for (i=0;i<pcis->n;i++) { 7205 if (array[i] != 0.0 && array[i] != array2[i]) { 7206 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 7207 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 7208 set_error = PETSC_TRUE; 7209 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 7210 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 7211 } 7212 } 7213 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 7214 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7215 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7216 for (i=0;i<pcis->n;i++) { 7217 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 7218 } 7219 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7220 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 7221 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7222 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7223 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 7224 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 7225 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 7226 PetscInt *gidxs; 7227 7228 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 7229 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 7230 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 7231 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7232 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 7233 for (i=0;i<pcbddc->local_primal_size;i++) { 7234 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 7235 } 7236 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7237 ierr = PetscFree(gidxs);CHKERRQ(ierr); 7238 } 7239 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7240 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7241 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 7242 } 7243 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 7244 /* get back data */ 7245 *coarse_size_n = coarse_size; 7246 *local_primal_indices_n = local_primal_indices; 7247 PetscFunctionReturn(0); 7248 } 7249 7250 #undef __FUNCT__ 7251 #define __FUNCT__ "PCBDDCGlobalToLocal" 7252 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 7253 { 7254 IS localis_t; 7255 PetscInt i,lsize,*idxs,n; 7256 PetscScalar *vals; 7257 PetscErrorCode ierr; 7258 7259 PetscFunctionBegin; 7260 /* get indices in local ordering exploiting local to global map */ 7261 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 7262 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 7263 for (i=0;i<lsize;i++) vals[i] = 1.0; 7264 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7265 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 7266 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 7267 if (idxs) { /* multilevel guard */ 7268 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 7269 } 7270 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 7271 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 7272 ierr = PetscFree(vals);CHKERRQ(ierr); 7273 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 7274 /* now compute set in local ordering */ 7275 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7276 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7277 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7278 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 7279 for (i=0,lsize=0;i<n;i++) { 7280 if (PetscRealPart(vals[i]) > 0.5) { 7281 lsize++; 7282 } 7283 } 7284 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 7285 for (i=0,lsize=0;i<n;i++) { 7286 if (PetscRealPart(vals[i]) > 0.5) { 7287 idxs[lsize++] = i; 7288 } 7289 } 7290 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 7291 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 7292 *localis = localis_t; 7293 PetscFunctionReturn(0); 7294 } 7295 7296 #undef __FUNCT__ 7297 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 7298 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 7299 { 7300 PC_IS *pcis=(PC_IS*)pc->data; 7301 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7302 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 7303 Mat S_j; 7304 PetscInt *used_xadj,*used_adjncy; 7305 PetscBool free_used_adj; 7306 PetscErrorCode ierr; 7307 7308 PetscFunctionBegin; 7309 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 7310 free_used_adj = PETSC_FALSE; 7311 if (pcbddc->sub_schurs_layers == -1) { 7312 used_xadj = NULL; 7313 used_adjncy = NULL; 7314 } else { 7315 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 7316 used_xadj = pcbddc->mat_graph->xadj; 7317 used_adjncy = pcbddc->mat_graph->adjncy; 7318 } else if (pcbddc->computed_rowadj) { 7319 used_xadj = pcbddc->mat_graph->xadj; 7320 used_adjncy = pcbddc->mat_graph->adjncy; 7321 } else { 7322 PetscBool flg_row=PETSC_FALSE; 7323 const PetscInt *xadj,*adjncy; 7324 PetscInt nvtxs; 7325 7326 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7327 if (flg_row) { 7328 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 7329 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 7330 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 7331 free_used_adj = PETSC_TRUE; 7332 } else { 7333 pcbddc->sub_schurs_layers = -1; 7334 used_xadj = NULL; 7335 used_adjncy = NULL; 7336 } 7337 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 7338 } 7339 } 7340 7341 /* setup sub_schurs data */ 7342 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7343 if (!sub_schurs->schur_explicit) { 7344 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 7345 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7346 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 7347 } else { 7348 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 7349 PetscBool isseqaij,need_change = PETSC_FALSE; 7350 PetscInt benign_n; 7351 Mat change = NULL; 7352 Vec scaling = NULL; 7353 IS change_primal = NULL; 7354 7355 if (!pcbddc->use_vertices && reuse_solvers) { 7356 PetscInt n_vertices; 7357 7358 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 7359 reuse_solvers = (PetscBool)!n_vertices; 7360 } 7361 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 7362 if (!isseqaij) { 7363 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 7364 if (matis->A == pcbddc->local_mat) { 7365 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 7366 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7367 } else { 7368 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 7369 } 7370 } 7371 if (!pcbddc->benign_change_explicit) { 7372 benign_n = pcbddc->benign_n; 7373 } else { 7374 benign_n = 0; 7375 } 7376 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 7377 We need a global reduction to avoid possible deadlocks. 7378 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 7379 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 7380 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 7381 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7382 need_change = (PetscBool)(!need_change); 7383 } 7384 /* If the user defines additional constraints, we import them here. 7385 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 7386 if (need_change) { 7387 PC_IS *pcisf; 7388 PC_BDDC *pcbddcf; 7389 PC pcf; 7390 7391 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 7392 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 7393 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 7394 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 7395 /* hacks */ 7396 pcisf = (PC_IS*)pcf->data; 7397 pcisf->is_B_local = pcis->is_B_local; 7398 pcisf->vec1_N = pcis->vec1_N; 7399 pcisf->BtoNmap = pcis->BtoNmap; 7400 pcisf->n = pcis->n; 7401 pcisf->n_B = pcis->n_B; 7402 pcbddcf = (PC_BDDC*)pcf->data; 7403 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 7404 pcbddcf->mat_graph = pcbddc->mat_graph; 7405 pcbddcf->use_faces = PETSC_TRUE; 7406 pcbddcf->use_change_of_basis = PETSC_TRUE; 7407 pcbddcf->use_change_on_faces = PETSC_TRUE; 7408 pcbddcf->use_qr_single = PETSC_TRUE; 7409 pcbddcf->fake_change = PETSC_TRUE; 7410 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 7411 /* store information on primal vertices and change of basis (in local numbering) */ 7412 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 7413 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 7414 change = pcbddcf->ConstraintMatrix; 7415 pcbddcf->ConstraintMatrix = NULL; 7416 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 7417 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 7418 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 7419 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 7420 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 7421 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 7422 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 7423 pcf->ops->destroy = NULL; 7424 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 7425 } 7426 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 7427 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 7428 ierr = MatDestroy(&change);CHKERRQ(ierr); 7429 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 7430 } 7431 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7432 7433 /* free adjacency */ 7434 if (free_used_adj) { 7435 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 7436 } 7437 PetscFunctionReturn(0); 7438 } 7439 7440 #undef __FUNCT__ 7441 #define __FUNCT__ "PCBDDCInitSubSchurs" 7442 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 7443 { 7444 PC_IS *pcis=(PC_IS*)pc->data; 7445 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7446 PCBDDCGraph graph; 7447 PetscErrorCode ierr; 7448 7449 PetscFunctionBegin; 7450 /* attach interface graph for determining subsets */ 7451 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 7452 IS verticesIS,verticescomm; 7453 PetscInt vsize,*idxs; 7454 7455 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7456 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 7457 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7458 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 7459 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 7460 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 7461 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 7462 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 7463 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 7464 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 7465 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 7466 } else { 7467 graph = pcbddc->mat_graph; 7468 } 7469 /* print some info */ 7470 if (pcbddc->dbg_flag) { 7471 IS vertices; 7472 PetscInt nv,nedges,nfaces; 7473 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 7474 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7475 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 7476 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7477 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 7478 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 7479 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 7480 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 7481 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7482 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 7483 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 7484 } 7485 7486 /* sub_schurs init */ 7487 if (!pcbddc->sub_schurs) { 7488 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 7489 } 7490 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 7491 7492 /* free graph struct */ 7493 if (pcbddc->sub_schurs_rebuild) { 7494 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 7495 } 7496 PetscFunctionReturn(0); 7497 } 7498 7499 #undef __FUNCT__ 7500 #define __FUNCT__ "PCBDDCCheckOperator" 7501 PetscErrorCode PCBDDCCheckOperator(PC pc) 7502 { 7503 PC_IS *pcis=(PC_IS*)pc->data; 7504 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 7505 PetscErrorCode ierr; 7506 7507 PetscFunctionBegin; 7508 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 7509 IS zerodiag = NULL; 7510 Mat S_j,B0_B=NULL; 7511 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 7512 PetscScalar *p0_check,*array,*array2; 7513 PetscReal norm; 7514 PetscInt i; 7515 7516 /* B0 and B0_B */ 7517 if (zerodiag) { 7518 IS dummy; 7519 7520 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 7521 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 7522 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 7523 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7524 } 7525 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 7526 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 7527 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 7528 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7529 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7530 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7531 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7532 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 7533 /* S_j */ 7534 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 7535 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 7536 7537 /* mimic vector in \widetilde{W}_\Gamma */ 7538 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 7539 /* continuous in primal space */ 7540 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 7541 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7542 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7543 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7544 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 7545 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 7546 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7547 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7548 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7549 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7550 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7551 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7552 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 7553 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 7554 7555 /* assemble rhs for coarse problem */ 7556 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 7557 /* local with Schur */ 7558 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 7559 if (zerodiag) { 7560 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7561 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 7562 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7563 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 7564 } 7565 /* sum on primal nodes the local contributions */ 7566 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7567 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7568 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7569 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7570 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 7571 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 7572 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 7573 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 7574 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7575 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7576 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7577 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 7578 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7579 /* scale primal nodes (BDDC sums contibutions) */ 7580 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 7581 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 7582 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 7583 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 7584 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 7585 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7586 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7587 /* global: \widetilde{B0}_B w_\Gamma */ 7588 if (zerodiag) { 7589 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 7590 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 7591 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 7592 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 7593 } 7594 /* BDDC */ 7595 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 7596 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 7597 7598 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 7599 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 7600 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 7601 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 7602 for (i=0;i<pcbddc->benign_n;i++) { 7603 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 7604 } 7605 ierr = PetscFree(p0_check);CHKERRQ(ierr); 7606 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 7607 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 7608 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 7609 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 7610 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 7611 } 7612 PetscFunctionReturn(0); 7613 } 7614